diff mbox series

[RFC,v3,28/37] WIP: bpf: Add fuse_ops struct_op programs

Message ID 20230418014037.2412394-29-drosen@google.com (mailing list archive)
State Mainlined, archived
Headers show
Series FUSE BPF: A Stacked Filesystem Extension for FUSE | expand

Commit Message

Daniel Rosenberg April 18, 2023, 1:40 a.m. UTC
This introduces a new struct_op type: fuse_ops. This program set
provides pre and post filters to run around fuse-bpf calls that act
directly on the lower filesystem.

The inputs are either fixed structures, or struct fuse_buffer's.

These programs are not permitted to make any changes to these fuse_buffers
unless they create a dynptr wrapper using the supplied kfunc helpers.

Fuse_buffers maintain additional state information that FUSE uses to
manage memory and determine if additional set up or checks are needed.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 include/linux/bpf_fuse.h          | 189 +++++++++++++++++++++++
 kernel/bpf/Makefile               |   4 +
 kernel/bpf/bpf_fuse.c             | 241 ++++++++++++++++++++++++++++++
 kernel/bpf/bpf_struct_ops_types.h |   4 +
 kernel/bpf/btf.c                  |   1 +
 kernel/bpf/verifier.c             |   9 ++
 6 files changed, 448 insertions(+)
 create mode 100644 kernel/bpf/bpf_fuse.c

Comments

Andrii Nakryiko April 27, 2023, 4:18 a.m. UTC | #1
On Mon, Apr 17, 2023 at 6:42 PM Daniel Rosenberg <drosen@google.com> wrote:
>
> This introduces a new struct_op type: fuse_ops. This program set
> provides pre and post filters to run around fuse-bpf calls that act
> directly on the lower filesystem.
>
> The inputs are either fixed structures, or struct fuse_buffer's.
>
> These programs are not permitted to make any changes to these fuse_buffers
> unless they create a dynptr wrapper using the supplied kfunc helpers.
>
> Fuse_buffers maintain additional state information that FUSE uses to
> manage memory and determine if additional set up or checks are needed.
>
> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> ---
>  include/linux/bpf_fuse.h          | 189 +++++++++++++++++++++++
>  kernel/bpf/Makefile               |   4 +
>  kernel/bpf/bpf_fuse.c             | 241 ++++++++++++++++++++++++++++++
>  kernel/bpf/bpf_struct_ops_types.h |   4 +
>  kernel/bpf/btf.c                  |   1 +
>  kernel/bpf/verifier.c             |   9 ++
>  6 files changed, 448 insertions(+)
>  create mode 100644 kernel/bpf/bpf_fuse.c
>
> diff --git a/include/linux/bpf_fuse.h b/include/linux/bpf_fuse.h
> index ce8b1b347496..780a7889aea2 100644
> --- a/include/linux/bpf_fuse.h
> +++ b/include/linux/bpf_fuse.h
> @@ -30,6 +30,8 @@ struct fuse_buffer {
>  #define BPF_FUSE_MODIFIED      (1 << 3) // The helper function allowed writes to the buffer
>  #define BPF_FUSE_ALLOCATED     (1 << 4) // The helper function allocated the buffer
>
> +extern void *bpf_fuse_get_writeable(struct fuse_buffer *arg, u64 size, bool copy);
> +
>  /*
>   * BPF Fuse Args
>   *
> @@ -81,4 +83,191 @@ static inline unsigned bpf_fuse_arg_size(const struct bpf_fuse_arg *arg)
>         return arg->is_buffer ? arg->buffer->size : arg->size;
>  }
>
> +struct fuse_ops {
> +       uint32_t (*open_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_open_in *in);
> +       uint32_t (*open_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_open_in *in,
> +                               struct fuse_open_out *out);
> +
> +       uint32_t (*opendir_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_open_in *in);
> +       uint32_t (*opendir_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_open_in *in,
> +                               struct fuse_open_out *out);
> +
> +       uint32_t (*create_open_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_create_in *in, struct fuse_buffer *name);
> +       uint32_t (*create_open_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_create_in *in, const struct fuse_buffer *name,
> +                               struct fuse_entry_out *entry_out, struct fuse_open_out *out);
> +
> +       uint32_t (*release_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_release_in *in);
> +       uint32_t (*release_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_release_in *in);
> +
> +       uint32_t (*releasedir_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_release_in *in);
> +       uint32_t (*releasedir_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_release_in *in);
> +
> +       uint32_t (*flush_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_flush_in *in);
> +       uint32_t (*flush_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_flush_in *in);
> +
> +       uint32_t (*lseek_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_lseek_in *in);
> +       uint32_t (*lseek_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_lseek_in *in,
> +                               struct fuse_lseek_out *out);
> +
> +       uint32_t (*copy_file_range_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_copy_file_range_in *in);
> +       uint32_t (*copy_file_range_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_copy_file_range_in *in,
> +                               struct fuse_write_out *out);
> +
> +       uint32_t (*fsync_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_fsync_in *in);
> +       uint32_t (*fsync_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_fsync_in *in);
> +
> +       uint32_t (*dir_fsync_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_fsync_in *in);
> +       uint32_t (*dir_fsync_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_fsync_in *in);
> +
> +       uint32_t (*getxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_getxattr_in *in, struct fuse_buffer *name);
> +       // if in->size > 0, use value. If in->size == 0, use out.
> +       uint32_t (*getxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_getxattr_in *in, const struct fuse_buffer *name,
> +                               struct fuse_buffer *value, struct fuse_getxattr_out *out);
> +
> +       uint32_t (*listxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_getxattr_in *in);
> +       // if in->size > 0, use value. If in->size == 0, use out.
> +       uint32_t (*listxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_getxattr_in *in,
> +                               struct fuse_buffer *value, struct fuse_getxattr_out *out);
> +
> +       uint32_t (*setxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_setxattr_in *in, struct fuse_buffer *name,
> +                                       struct fuse_buffer *value);
> +       uint32_t (*setxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_setxattr_in *in, const struct fuse_buffer *name,
> +                                       const struct fuse_buffer *value);
> +
> +       uint32_t (*removexattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_buffer *name);
> +       uint32_t (*removexattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_buffer *name);
> +
> +       /* Read and Write iter will likely undergo some sort of change/addition to handle changing
> +        * the data buffer passed in/out. */
> +       uint32_t (*read_iter_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_read_in *in);
> +       uint32_t (*read_iter_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_read_in *in,
> +                               struct fuse_read_iter_out *out);
> +
> +       uint32_t (*write_iter_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_write_in *in);
> +       uint32_t (*write_iter_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_write_in *in,
> +                               struct fuse_write_iter_out *out);
> +
> +       uint32_t (*file_fallocate_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_fallocate_in *in);
> +       uint32_t (*file_fallocate_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_fallocate_in *in);
> +
> +       uint32_t (*lookup_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_buffer *name);
> +       uint32_t (*lookup_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_buffer *name,
> +                               struct fuse_entry_out *out, struct fuse_buffer *entries);
> +
> +       uint32_t (*mknod_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_mknod_in *in, struct fuse_buffer *name);
> +       uint32_t (*mknod_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_mknod_in *in, const struct fuse_buffer *name);
> +
> +       uint32_t (*mkdir_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_mkdir_in *in, struct fuse_buffer *name);
> +       uint32_t (*mkdir_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_mkdir_in *in, const struct fuse_buffer *name);
> +
> +       uint32_t (*rmdir_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_buffer *name);
> +       uint32_t (*rmdir_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_buffer *name);
> +
> +       uint32_t (*rename2_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_rename2_in *in, struct fuse_buffer *old_name,
> +                               struct fuse_buffer *new_name);
> +       uint32_t (*rename2_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_rename2_in *in, const struct fuse_buffer *old_name,
> +                               const struct fuse_buffer *new_name);
> +
> +       uint32_t (*rename_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_rename_in *in, struct fuse_buffer *old_name,
> +                               struct fuse_buffer *new_name);
> +       uint32_t (*rename_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_rename_in *in, const struct fuse_buffer *old_name,
> +                               const struct fuse_buffer *new_name);
> +
> +       uint32_t (*unlink_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_buffer *name);
> +       uint32_t (*unlink_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_buffer *name);
> +
> +       uint32_t (*link_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_link_in *in, struct fuse_buffer *name);
> +       uint32_t (*link_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_link_in *in, const struct fuse_buffer *name);
> +
> +       uint32_t (*getattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_getattr_in *in);
> +       uint32_t (*getattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_getattr_in *in,
> +                               struct fuse_attr_out *out);
> +
> +       uint32_t (*setattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_setattr_in *in);
> +       uint32_t (*setattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_setattr_in *in,
> +                               struct fuse_attr_out *out);
> +
> +       uint32_t (*statfs_prefilter)(const struct bpf_fuse_meta_info *meta);
> +       uint32_t (*statfs_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_statfs_out *out);
> +
> +       //TODO: This does not allow doing anything with path
> +       uint32_t (*get_link_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_buffer *name);
> +       uint32_t (*get_link_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_buffer *name);
> +
> +       uint32_t (*symlink_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_buffer *name, struct fuse_buffer *path);
> +       uint32_t (*symlink_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_buffer *name, const struct fuse_buffer *path);
> +
> +       uint32_t (*readdir_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_read_in *in);
> +       uint32_t (*readdir_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_read_in *in,
> +                               struct fuse_read_out *out, struct fuse_buffer *buffer);
> +
> +       uint32_t (*access_prefilter)(const struct bpf_fuse_meta_info *meta,
> +                               struct fuse_access_in *in);
> +       uint32_t (*access_postfilter)(const struct bpf_fuse_meta_info *meta,
> +                               const struct fuse_access_in *in);
> +
> +       char name[BPF_FUSE_NAME_MAX];
> +};

Have you considered grouping this huge amount of callbacks into a
smaller set of more generic callbacks where each callback would get
enum argument specifying what sort of operation it is called for? This
has many advantages, starting from not having to deal with struct_ops
limits, ending with not needing to instantiate dozens of individual
BPF programs.

E.g., for a lot of operations the difference between pre- and
post-filter is in having in argument as read-only and maybe having
extra out argument for post-filter. One way to unify such post/pre
filters into one callback would be to record whether in has to be
read-only  or read-write and not allow to create r/w dynptr for the
former case. Pass bool or enum specifying if it's post or pre filter.
For that optional out argument, you can simulate effectively the same
by always supplying it, but making sure that out parameter is
read-only and zero-sized, for example.

That would cut the number of callbacks in two, which I'd say still is
not great :) I think it would be better still to have even larger
groups of callbacks for whole families of operations with the same (or
"unifiable") interface (domain experts like you would need to do an
analysis here to see what makes sense to group, of course).

We'll probably touch on that tomorrow at BPF office hours, but I
wanted to point this out beforehand, so that you have time to think
about it.

> +
>  #endif /* _BPF_FUSE_H */
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 1d3892168d32..26a2e741ef61 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile

[...]

> +__diag_push();
> +__diag_ignore_all("-Wmissing-prototypes",
> +                  "Global kfuncs as their definitions will be in BTF");
> +void bpf_fuse_get_rw_dynptr(struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit, u64 size, bool copy)

not clear why size is passed from outside instead of instantiating
dynptr with buffer->size? See [0] for bpf_dynptr_adjust and
bpf_dynptr_clone that allow you to adjust buffer as necessary.

As for the copy parameter, can you elaborate on the idea behind it?

  [0] https://patchwork.kernel.org/project/netdevbpf/list/?series=741584&state=*

> +{
> +       buffer->data = bpf_fuse_get_writeable(buffer, size, copy);
> +       bpf_dynptr_init(dynptr__uninit, buffer->data, BPF_DYNPTR_TYPE_LOCAL, 0, buffer->size);
> +}
> +
> +void bpf_fuse_get_ro_dynptr(const struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit)

these kfuncs probably should be more consistently named as
bpf_dynptr_from_fuse_buffer_{ro,rw}() ?

> +{
> +       bpf_dynptr_init(dynptr__uninit, buffer->data, BPF_DYNPTR_TYPE_LOCAL, 0, buffer->size);
> +       bpf_dynptr_set_rdonly(dynptr__uninit);
> +}
> +
> +uint32_t bpf_fuse_return_len(struct fuse_buffer *buffer)
> +{
> +       return buffer->size;

you should be able to get this with bpf_dynptr_size() (once you create
it from fuse_buffer).

> +}
> +__diag_pop();
> +BTF_SET8_START(fuse_kfunc_set)
> +BTF_ID_FLAGS(func, bpf_fuse_get_rw_dynptr)
> +BTF_ID_FLAGS(func, bpf_fuse_get_ro_dynptr)
> +BTF_ID_FLAGS(func, bpf_fuse_return_len)
> +BTF_SET8_END(fuse_kfunc_set)
> +
> +static const struct btf_kfunc_id_set bpf_fuse_kfunc_set = {
> +       .owner = THIS_MODULE,
> +       .set = &fuse_kfunc_set,
> +};
> +
> +static int __init bpf_fuse_kfuncs_init(void)
> +{
> +       return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> +                                        &bpf_fuse_kfunc_set);
> +}
> +
> +late_initcall(bpf_fuse_kfuncs_init);
> +
> +static const struct bpf_func_proto *bpf_fuse_get_func_proto(enum bpf_func_id func_id,
> +                                                             const struct bpf_prog *prog)
> +{
> +       switch (func_id) {
> +       default:
> +               return bpf_base_func_proto(func_id);
> +       }
> +}
> +
> +static bool bpf_fuse_is_valid_access(int off, int size,
> +                                   enum bpf_access_type type,
> +                                   const struct bpf_prog *prog,
> +                                   struct bpf_insn_access_aux *info)
> +{
> +       return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
> +}
> +
> +const struct btf_type *fuse_buffer_struct_type;
> +
> +static int bpf_fuse_btf_struct_access(struct bpf_verifier_log *log,
> +                                       const struct bpf_reg_state *reg,
> +                                       int off, int size)
> +{
> +       const struct btf_type *t;
> +
> +       t = btf_type_by_id(reg->btf, reg->btf_id);
> +       if (t == fuse_buffer_struct_type) {
> +               bpf_log(log,
> +                       "direct access to fuse_buffer is disallowed\n");
> +               return -EACCES;
> +       }
> +
> +       return 0;
> +}
> +
> +static const struct bpf_verifier_ops bpf_fuse_verifier_ops = {
> +       .get_func_proto         = bpf_fuse_get_func_proto,

you probably should be fine with just using bpf_tracing_func_proto as is

> +       .is_valid_access        = bpf_fuse_is_valid_access,

similarly, why custom no-op callback?

> +       .btf_struct_access      = bpf_fuse_btf_struct_access,
> +};
> +
> +static int bpf_fuse_check_member(const struct btf_type *t,
> +                                  const struct btf_member *member,
> +                                  const struct bpf_prog *prog)
> +{
> +       //if (is_unsupported(__btf_member_bit_offset(t, member) / 8))
> +       //      return -ENOTSUPP;
> +       return 0;
> +}
> +
> +static int bpf_fuse_init_member(const struct btf_type *t,
> +                                 const struct btf_member *member,
> +                                 void *kdata, const void *udata)
> +{
> +       const struct fuse_ops *uf_ops;
> +       struct fuse_ops *f_ops;
> +       u32 moff;
> +
> +       uf_ops = (const struct fuse_ops *)udata;
> +       f_ops = (struct fuse_ops *)kdata;
> +
> +       moff = __btf_member_bit_offset(t, member) / 8;
> +       switch (moff) {
> +       case offsetof(struct fuse_ops, name):
> +               if (bpf_obj_name_cpy(f_ops->name, uf_ops->name,
> +                                    sizeof(f_ops->name)) <= 0)
> +                       return -EINVAL;
> +               //if (tcp_ca_find(utcp_ca->name))
> +               //      return -EEXIST;
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +static int bpf_fuse_init(struct btf *btf)
> +{
> +       s32 type_id;
> +
> +       type_id = btf_find_by_name_kind(btf, "fuse_buffer", BTF_KIND_STRUCT);
> +       if (type_id < 0)
> +               return -EINVAL;
> +       fuse_buffer_struct_type = btf_type_by_id(btf, type_id);
> +

see BTF_ID and BTF_ID_LIST uses for how to get ID for your custom
well-known type

> +       return 0;
> +}
> +
> +static struct bpf_fuse_ops_attach *fuse_reg = NULL;
> +

[...]
Daniel Rosenberg May 3, 2023, 1:53 a.m. UTC | #2
On Wed, Apr 26, 2023 at 9:18 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> Have you considered grouping this huge amount of callbacks into a
> smaller set of more generic callbacks where each callback would get
> enum argument specifying what sort of operation it is called for? This
> has many advantages, starting from not having to deal with struct_ops
> limits, ending with not needing to instantiate dozens of individual
> BPF programs.
>
> E.g., for a lot of operations the difference between pre- and
> post-filter is in having in argument as read-only and maybe having
> extra out argument for post-filter. One way to unify such post/pre
> filters into one callback would be to record whether in has to be
> read-only  or read-write and not allow to create r/w dynptr for the
> former case. Pass bool or enum specifying if it's post or pre filter.
> For that optional out argument, you can simulate effectively the same
> by always supplying it, but making sure that out parameter is
> read-only and zero-sized, for example.
>
> That would cut the number of callbacks in two, which I'd say still is
> not great :) I think it would be better still to have even larger
> groups of callbacks for whole families of operations with the same (or
> "unifiable") interface (domain experts like you would need to do an
> analysis here to see what makes sense to group, of course).
>
> We'll probably touch on that tomorrow at BPF office hours, but I
> wanted to point this out beforehand, so that you have time to think
> about it.
>

The meta info struct we pass in includes the opcode which contains
whether it is a prefilter or postfilter, although I guess that may be
less accessible to the verifier than a separate bool. In the v1
version, we handled all op codes in a single program, although I think
we were running into some slowdowns when we had every opcode in a
giant switch statement, plus we were incurring the cost of the bpf
program even when we didn't need to do anything in it. The struct_op
version lets us entirely skip calling the bpf for opcodes we don't
need to handle.

Many of the arguments we pass currently are structs. If they were all
dynptrs, we could set the output related ones to empty/readonly, but
that removes one of the other strengths of the struct_op setup, where
we can actually label the inputs as the structs they are instead of a
void* equivalent. There are definitely some cases where we could
easily merge opcode callbacks, like FUSE_FSYNCDIR/FUSE_FSYNC and
FUSE_OPEN/FUSE_OPENDIR. I set them up as separate since it's easy to
assign the same program to both callbacks in the case where you want
both to be handled the same, while maintaining flexibility to handle
them separately.

> +void bpf_fuse_get_rw_dynptr(struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit, u64 size, bool copy)
>
> not clear why size is passed from outside instead of instantiating
> dynptr with buffer->size? See [0] for bpf_dynptr_adjust and
> bpf_dynptr_clone that allow you to adjust buffer as necessary.
>
> As for the copy parameter, can you elaborate on the idea behind it?
>
>   [0] https://patchwork.kernel.org/project/netdevbpf/list/?series=741584&state=*
>

We're storing these buffers as fuse_buffers initially because of the
additional metadata we're carrying. Some fields have variable lengths,
or are backed by const data. For instance, names. If you wanted to
alter the name we use on the lower filesystem, you cannot change it
directly since it's being backed by the dentry name. If you wanted to
adjust something, like perhaps adding an extension, you would pass
bpf_fuse_get_rw_dynptr the size you'd want for the new buffer, and
copy=true to get the preexisting data. Fuse_buffer tracks that data
was allocated so Fuse can clean up after the call. Additionally, say
you wanted to trim half the data returned by an xattr for some reason.
You would give it a size less than the buffer size to inform fuse that
it should ignore the second half of the data. That part could be
handled by bpf_dynptr_adjust if we didn't also need to handle the
allocation case.
Say you wanted to have the lower file name be the hash of the one you
created. In that case, you could get bpf_fuse_get_ro_dynptr to get
access to compute the hash, and then bpf_fuse_get_rw_dynptr to get a
buffer to write the hash to. Since the data is not directly related to
the original data, there would be no benefit to getting a copy.

I initially intended for bpf_fuse_get_ro_dynptr/bpf_fuse_get_rw_dynptr
to be called at most once for each field, but that may be too
restrictive. At the moment, if you make two calls that require
reallocating, any pointers to the old buffer would be invalid. This is
not the case for the original name, as we aren't touching the original
source. There are two possible approaches here. I could either
refcount the buffer and have a put kfunc, or I could invalidate old
dynpointers when bpf_fuse_get_rw_dynptr is called, similar to what
skb/xdp do. I'm leaning towards the latter to disallow having many
allocations active at once by calling bpf_fuse_get_rw_dynptr for
increasing sizes, though I could also just disallow reallocating a
buffer that already was reallocated.

The new dynptr helpers are pretty exciting since they'll make it much
easier to deal with chunks of data, which we may end up doing in
read/write filters. I haven't fully set those up since I was waiting
to see what the dynptr helpers ended up looking like.


> > +void bpf_fuse_get_ro_dynptr(const struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit)
>
> these kfuncs probably should be more consistently named as
> bpf_dynptr_from_fuse_buffer_{ro,rw}() ?
>
Yeah, that fits in much better with the skb/xdp functions.

> > +
> > +uint32_t bpf_fuse_return_len(struct fuse_buffer *buffer)
> > +{
> > +       return buffer->size;
>
> you should be able to get this with bpf_dynptr_size() (once you create
> it from fuse_buffer).
>

Yes, this might be unnecessary. I added it while testing kfuncs, and
had intended to use it with a fuse_buffer strncmp before I saw that
there's now a bpf_strncmp :) I had tried using it with
bpf_dynptr_slice, but that requires a known constant at verification
time, which may make using it in real cases a bit difficult...
bpf_strncmp also has some restrictions around the second string being
a fixed map, or something like that.

>
> you probably should be fine with just using bpf_tracing_func_proto as is
>
> > +       .is_valid_access        = bpf_fuse_is_valid_access,
>
> similarly, why custom no-op callback?
>

Those are largely carried over from iterations when I was less sure
what I would need. A lot of the work I was doing in the v1 code is
handled by default with the struct_op setup now, or is otherwise
unnecessary. This area in particular needs a lot of cleanup.

> > +static int bpf_fuse_init(struct btf *btf)
> > +{
> > +       s32 type_id;
> > +
> > +       type_id = btf_find_by_name_kind(btf, "fuse_buffer", BTF_KIND_STRUCT);
> > +       if (type_id < 0)
> > +               return -EINVAL;
> > +       fuse_buffer_struct_type = btf_type_by_id(btf, type_id);
> > +
>
> see BTF_ID and BTF_ID_LIST uses for how to get ID for your custom
> well-known type
>
Thanks, I'll look into those.
Andrii Nakryiko May 3, 2023, 6:22 p.m. UTC | #3
On Tue, May 2, 2023 at 6:53 PM Daniel Rosenberg <drosen@google.com> wrote:
>
> On Wed, Apr 26, 2023 at 9:18 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > Have you considered grouping this huge amount of callbacks into a
> > smaller set of more generic callbacks where each callback would get
> > enum argument specifying what sort of operation it is called for? This
> > has many advantages, starting from not having to deal with struct_ops
> > limits, ending with not needing to instantiate dozens of individual
> > BPF programs.
> >
> > E.g., for a lot of operations the difference between pre- and
> > post-filter is in having in argument as read-only and maybe having
> > extra out argument for post-filter. One way to unify such post/pre
> > filters into one callback would be to record whether in has to be
> > read-only  or read-write and not allow to create r/w dynptr for the
> > former case. Pass bool or enum specifying if it's post or pre filter.
> > For that optional out argument, you can simulate effectively the same
> > by always supplying it, but making sure that out parameter is
> > read-only and zero-sized, for example.
> >
> > That would cut the number of callbacks in two, which I'd say still is
> > not great :) I think it would be better still to have even larger
> > groups of callbacks for whole families of operations with the same (or
> > "unifiable") interface (domain experts like you would need to do an
> > analysis here to see what makes sense to group, of course).
> >
> > We'll probably touch on that tomorrow at BPF office hours, but I
> > wanted to point this out beforehand, so that you have time to think
> > about it.
> >
>
> The meta info struct we pass in includes the opcode which contains
> whether it is a prefilter or postfilter, although I guess that may be
> less accessible to the verifier than a separate bool. In the v1
> version, we handled all op codes in a single program, although I think
> we were running into some slowdowns when we had every opcode in a
> giant switch statement, plus we were incurring the cost of the bpf
> program even when we didn't need to do anything in it. The struct_op
> version lets us entirely skip calling the bpf for opcodes we don't
> need to handle.
>
> Many of the arguments we pass currently are structs. If they were all
> dynptrs, we could set the output related ones to empty/readonly, but
> that removes one of the other strengths of the struct_op setup, where
> we can actually label the inputs as the structs they are instead of a
> void* equivalent. There are definitely some cases where we could
> easily merge opcode callbacks, like FUSE_FSYNCDIR/FUSE_FSYNC and
> FUSE_OPEN/FUSE_OPENDIR. I set them up as separate since it's easy to
> assign the same program to both callbacks in the case where you want
> both to be handled the same, while maintaining flexibility to handle
> them separately.

If combining hooks doesn't bring any value and simplification, I think
it's fine to keep it as is. I was mostly probing if there is an
equally convenient, but more succinct API that could be exposed
through struct_ops. If there is none, then it's fine.

>
> > +void bpf_fuse_get_rw_dynptr(struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit, u64 size, bool copy)
> >
> > not clear why size is passed from outside instead of instantiating
> > dynptr with buffer->size? See [0] for bpf_dynptr_adjust and
> > bpf_dynptr_clone that allow you to adjust buffer as necessary.
> >
> > As for the copy parameter, can you elaborate on the idea behind it?
> >
> >   [0] https://patchwork.kernel.org/project/netdevbpf/list/?series=741584&state=*
> >
>
> We're storing these buffers as fuse_buffers initially because of the
> additional metadata we're carrying. Some fields have variable lengths,
> or are backed by const data. For instance, names. If you wanted to
> alter the name we use on the lower filesystem, you cannot change it
> directly since it's being backed by the dentry name. If you wanted to
> adjust something, like perhaps adding an extension, you would pass
> bpf_fuse_get_rw_dynptr the size you'd want for the new buffer, and
> copy=true to get the preexisting data. Fuse_buffer tracks that data
> was allocated so Fuse can clean up after the call. Additionally, say
> you wanted to trim half the data returned by an xattr for some reason.
> You would give it a size less than the buffer size to inform fuse that
> it should ignore the second half of the data. That part could be
> handled by bpf_dynptr_adjust if we didn't also need to handle the
> allocation case.

Interesting point about allocations and needing to realloc names. But
I wonder if it makes more sense to split the copy/reallocation part
and do it with separate kfunc. And leave dynptr only as means to work
with that data. So you'd do something like below for read/write case:

bpf_fuse_buf_clone(&buffer, new_size);
bpf_fuse_dynptr_from_buf_rw(&buffer, &dynptr);

But would skip bpf_fuse_buf_clone() if you only ever read:

bpf_fuse_dynptr_from_buf_ro(&buffer, &dynptr);

If fuse_buffer was never cloned/realloced, then
bpf_fuse_dynptr_from_buf_rw() should just fail and return invalid
dynptr.


> Say you wanted to have the lower file name be the hash of the one you
> created. In that case, you could get bpf_fuse_get_ro_dynptr to get
> access to compute the hash, and then bpf_fuse_get_rw_dynptr to get a
> buffer to write the hash to. Since the data is not directly related to
> the original data, there would be no benefit to getting a copy.
>
> I initially intended for bpf_fuse_get_ro_dynptr/bpf_fuse_get_rw_dynptr
> to be called at most once for each field, but that may be too
> restrictive. At the moment, if you make two calls that require
> reallocating, any pointers to the old buffer would be invalid. This is
> not the case for the original name, as we aren't touching the original
> source. There are two possible approaches here. I could either
> refcount the buffer and have a put kfunc, or I could invalidate old
> dynpointers when bpf_fuse_get_rw_dynptr is called, similar to what
> skb/xdp do. I'm leaning towards the latter to disallow having many
> allocations active at once by calling bpf_fuse_get_rw_dynptr for
> increasing sizes, though I could also just disallow reallocating a
> buffer that already was reallocated.

Yes, invalidating dynptrs sounds like a way to go. But I think instead
of bundling all that into dynptr constructor for fuse_buffer, it's
better to have a separate kfunc that would be doing realloc/cloning
*and* invalidating. Other than that, neither from_buf_rw nor
from_buf_ro should be doing invalidation, because they can't cause
realloc. WDYT?

>
> The new dynptr helpers are pretty exciting since they'll make it much
> easier to deal with chunks of data, which we may end up doing in
> read/write filters. I haven't fully set those up since I was waiting
> to see what the dynptr helpers ended up looking like.
>

Great, let us know how it goes in practice to start using them.

>
> > > +void bpf_fuse_get_ro_dynptr(const struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit)
> >
> > these kfuncs probably should be more consistently named as
> > bpf_dynptr_from_fuse_buffer_{ro,rw}() ?
> >
> Yeah, that fits in much better with the skb/xdp functions.

great

>
> > > +
> > > +uint32_t bpf_fuse_return_len(struct fuse_buffer *buffer)
> > > +{
> > > +       return buffer->size;
> >
> > you should be able to get this with bpf_dynptr_size() (once you create
> > it from fuse_buffer).
> >
>
> Yes, this might be unnecessary. I added it while testing kfuncs, and
> had intended to use it with a fuse_buffer strncmp before I saw that
> there's now a bpf_strncmp :) I had tried using it with
> bpf_dynptr_slice, but that requires a known constant at verification
> time, which may make using it in real cases a bit difficult...
> bpf_strncmp also has some restrictions around the second string being
> a fixed map, or something like that.

right, we might need a more flexible strncmp version working with two
dynptrs and not assuming a fixed string. We didn't have dynptr
abstraction for working with variable-sized memory when we were adding
bpf_strncmp.

>
> >
> > you probably should be fine with just using bpf_tracing_func_proto as is
> >
> > > +       .is_valid_access        = bpf_fuse_is_valid_access,
> >
> > similarly, why custom no-op callback?
> >
>
> Those are largely carried over from iterations when I was less sure
> what I would need. A lot of the work I was doing in the v1 code is
> handled by default with the struct_op setup now, or is otherwise
> unnecessary. This area in particular needs a lot of cleanup.
>

ok

> > > +static int bpf_fuse_init(struct btf *btf)
> > > +{
> > > +       s32 type_id;
> > > +
> > > +       type_id = btf_find_by_name_kind(btf, "fuse_buffer", BTF_KIND_STRUCT);
> > > +       if (type_id < 0)
> > > +               return -EINVAL;
> > > +       fuse_buffer_struct_type = btf_type_by_id(btf, type_id);
> > > +
> >
> > see BTF_ID and BTF_ID_LIST uses for how to get ID for your custom
> > well-known type
> >
> Thanks, I'll look into those.
diff mbox series

Patch

diff --git a/include/linux/bpf_fuse.h b/include/linux/bpf_fuse.h
index ce8b1b347496..780a7889aea2 100644
--- a/include/linux/bpf_fuse.h
+++ b/include/linux/bpf_fuse.h
@@ -30,6 +30,8 @@  struct fuse_buffer {
 #define BPF_FUSE_MODIFIED	(1 << 3) // The helper function allowed writes to the buffer
 #define BPF_FUSE_ALLOCATED	(1 << 4) // The helper function allocated the buffer
 
+extern void *bpf_fuse_get_writeable(struct fuse_buffer *arg, u64 size, bool copy);
+
 /*
  * BPF Fuse Args
  *
@@ -81,4 +83,191 @@  static inline unsigned bpf_fuse_arg_size(const struct bpf_fuse_arg *arg)
 	return arg->is_buffer ? arg->buffer->size : arg->size;
 }
 
+struct fuse_ops {
+	uint32_t (*open_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_open_in *in);
+	uint32_t (*open_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_open_in *in,
+				struct fuse_open_out *out);
+
+	uint32_t (*opendir_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_open_in *in);
+	uint32_t (*opendir_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_open_in *in,
+				struct fuse_open_out *out);
+
+	uint32_t (*create_open_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_create_in *in, struct fuse_buffer *name);
+	uint32_t (*create_open_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_create_in *in, const struct fuse_buffer *name,
+				struct fuse_entry_out *entry_out, struct fuse_open_out *out);
+
+	uint32_t (*release_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_release_in *in);
+	uint32_t (*release_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_release_in *in);
+
+	uint32_t (*releasedir_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_release_in *in);
+	uint32_t (*releasedir_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_release_in *in);
+
+	uint32_t (*flush_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_flush_in *in);
+	uint32_t (*flush_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_flush_in *in);
+
+	uint32_t (*lseek_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_lseek_in *in);
+	uint32_t (*lseek_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_lseek_in *in,
+				struct fuse_lseek_out *out);
+
+	uint32_t (*copy_file_range_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_copy_file_range_in *in);
+	uint32_t (*copy_file_range_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_copy_file_range_in *in,
+				struct fuse_write_out *out);
+
+	uint32_t (*fsync_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_fsync_in *in);
+	uint32_t (*fsync_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_fsync_in *in);
+
+	uint32_t (*dir_fsync_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_fsync_in *in);
+	uint32_t (*dir_fsync_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_fsync_in *in);
+
+	uint32_t (*getxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_getxattr_in *in, struct fuse_buffer *name);
+	// if in->size > 0, use value. If in->size == 0, use out.
+	uint32_t (*getxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_getxattr_in *in, const struct fuse_buffer *name,
+				struct fuse_buffer *value, struct fuse_getxattr_out *out);
+
+	uint32_t (*listxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_getxattr_in *in);
+	// if in->size > 0, use value. If in->size == 0, use out.
+	uint32_t (*listxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_getxattr_in *in,
+				struct fuse_buffer *value, struct fuse_getxattr_out *out);
+
+	uint32_t (*setxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_setxattr_in *in, struct fuse_buffer *name,
+					struct fuse_buffer *value);
+	uint32_t (*setxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_setxattr_in *in, const struct fuse_buffer *name,
+					const struct fuse_buffer *value);
+
+	uint32_t (*removexattr_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_buffer *name);
+	uint32_t (*removexattr_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_buffer *name);
+
+	/* Read and Write iter will likely undergo some sort of change/addition to handle changing
+	 * the data buffer passed in/out. */
+	uint32_t (*read_iter_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_read_in *in);
+	uint32_t (*read_iter_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_read_in *in,
+				struct fuse_read_iter_out *out);
+
+	uint32_t (*write_iter_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_write_in *in);
+	uint32_t (*write_iter_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_write_in *in,
+				struct fuse_write_iter_out *out);
+
+	uint32_t (*file_fallocate_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_fallocate_in *in);
+	uint32_t (*file_fallocate_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_fallocate_in *in);
+
+	uint32_t (*lookup_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_buffer *name);
+	uint32_t (*lookup_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_buffer *name,
+				struct fuse_entry_out *out, struct fuse_buffer *entries);
+
+	uint32_t (*mknod_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_mknod_in *in, struct fuse_buffer *name);
+	uint32_t (*mknod_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_mknod_in *in, const struct fuse_buffer *name);
+
+	uint32_t (*mkdir_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_mkdir_in *in, struct fuse_buffer *name);
+	uint32_t (*mkdir_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_mkdir_in *in, const struct fuse_buffer *name);
+
+	uint32_t (*rmdir_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_buffer *name);
+	uint32_t (*rmdir_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_buffer *name);
+
+	uint32_t (*rename2_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_rename2_in *in, struct fuse_buffer *old_name,
+				struct fuse_buffer *new_name);
+	uint32_t (*rename2_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_rename2_in *in, const struct fuse_buffer *old_name,
+				const struct fuse_buffer *new_name);
+
+	uint32_t (*rename_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_rename_in *in, struct fuse_buffer *old_name,
+				struct fuse_buffer *new_name);
+	uint32_t (*rename_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_rename_in *in, const struct fuse_buffer *old_name,
+				const struct fuse_buffer *new_name);
+
+	uint32_t (*unlink_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_buffer *name);
+	uint32_t (*unlink_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_buffer *name);
+
+	uint32_t (*link_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_link_in *in, struct fuse_buffer *name);
+	uint32_t (*link_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_link_in *in, const struct fuse_buffer *name);
+
+	uint32_t (*getattr_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_getattr_in *in);
+	uint32_t (*getattr_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_getattr_in *in,
+				struct fuse_attr_out *out);
+
+	uint32_t (*setattr_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_setattr_in *in);
+	uint32_t (*setattr_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_setattr_in *in,
+				struct fuse_attr_out *out);
+
+	uint32_t (*statfs_prefilter)(const struct bpf_fuse_meta_info *meta);
+	uint32_t (*statfs_postfilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_statfs_out *out);
+
+	//TODO: This does not allow doing anything with path
+	uint32_t (*get_link_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_buffer *name);
+	uint32_t (*get_link_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_buffer *name);
+
+	uint32_t (*symlink_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_buffer *name, struct fuse_buffer *path);
+	uint32_t (*symlink_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_buffer *name, const struct fuse_buffer *path);
+
+	uint32_t (*readdir_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_read_in *in);
+	uint32_t (*readdir_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_read_in *in,
+				struct fuse_read_out *out, struct fuse_buffer *buffer);
+
+	uint32_t (*access_prefilter)(const struct bpf_fuse_meta_info *meta,
+				struct fuse_access_in *in);
+	uint32_t (*access_postfilter)(const struct bpf_fuse_meta_info *meta,
+				const struct fuse_access_in *in);
+
+	char name[BPF_FUSE_NAME_MAX];
+};
+
 #endif /* _BPF_FUSE_H */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 1d3892168d32..26a2e741ef61 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -45,3 +45,7 @@  obj-$(CONFIG_BPF_PRELOAD) += preload/
 obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
 $(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE
 	$(call if_changed_rule,cc_o_c)
+
+ifeq ($(CONFIG_FUSE_BPF),y)
+obj-$(CONFIG_BPF_SYSCALL) += bpf_fuse.o
+endif
diff --git a/kernel/bpf/bpf_fuse.c b/kernel/bpf/bpf_fuse.c
new file mode 100644
index 000000000000..35125c1f8eef
--- /dev/null
+++ b/kernel/bpf/bpf_fuse.c
@@ -0,0 +1,241 @@ 
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2021 Google LLC
+
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/bpf_fuse.h>
+#include <linux/bpf_verifier.h>
+#include <linux/btf.h>
+
+void *bpf_fuse_get_writeable(struct fuse_buffer *arg, u64 size, bool copy)
+{
+	void *writeable_val;
+
+	if (arg->flags & BPF_FUSE_IMMUTABLE)
+		return 0;
+
+	if (size <= arg->size &&
+			(!(arg->flags & BPF_FUSE_MUST_ALLOCATE) ||
+			  (arg->flags & BPF_FUSE_ALLOCATED))) {
+		if (arg->flags & BPF_FUSE_VARIABLE_SIZE)
+			arg->size = size;
+		arg->flags |= BPF_FUSE_MODIFIED;
+		return arg->data;
+	}
+	/* Variable sized arrays must stay below max size. If the buffer must be fixed size,
+	 * don't change the allocated size. Verifier will enforce requested size for accesses
+	 */
+	if (arg->flags & BPF_FUSE_VARIABLE_SIZE) {
+		if (size > arg->max_size)
+			return 0;
+	} else {
+		if (size > arg->size)
+			return 0;
+		size = arg->size;
+	}
+
+	if (size != arg->size && size > arg->max_size)
+		return 0;
+
+	/* If our buffer is big enough, just adjust size */
+	if (size <= arg->alloc_size) {
+		if (!copy)
+			arg->size = size;
+		arg->flags |= BPF_FUSE_MODIFIED;
+		return arg->data;
+	}
+
+	writeable_val = kzalloc(size, GFP_KERNEL);
+	if (!writeable_val)
+		return 0;
+
+	arg->alloc_size = size;
+	/* If we're copying the buffer, assume the same amount is used. If that isn't the case,
+	 * caller must change size. Otherwise, assume entirety of new buffer is used.
+	 */
+	if (copy)
+		memcpy(writeable_val, arg->data, (arg->size > size) ? size : arg->size);
+	else
+		arg->size = size;
+
+	if (arg->flags & BPF_FUSE_ALLOCATED)
+		kfree(arg->data);
+	arg->data = writeable_val;
+
+	arg->flags |= BPF_FUSE_ALLOCATED | BPF_FUSE_MODIFIED;
+
+	return arg->data;
+}
+EXPORT_SYMBOL(bpf_fuse_get_writeable);
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+                  "Global kfuncs as their definitions will be in BTF");
+void bpf_fuse_get_rw_dynptr(struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit, u64 size, bool copy)
+{
+	buffer->data = bpf_fuse_get_writeable(buffer, size, copy);
+	bpf_dynptr_init(dynptr__uninit, buffer->data, BPF_DYNPTR_TYPE_LOCAL, 0, buffer->size);
+}
+
+void bpf_fuse_get_ro_dynptr(const struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit)
+{
+	bpf_dynptr_init(dynptr__uninit, buffer->data, BPF_DYNPTR_TYPE_LOCAL, 0, buffer->size);
+	bpf_dynptr_set_rdonly(dynptr__uninit);
+}
+
+uint32_t bpf_fuse_return_len(struct fuse_buffer *buffer)
+{
+	return buffer->size;
+}
+__diag_pop();
+BTF_SET8_START(fuse_kfunc_set)
+BTF_ID_FLAGS(func, bpf_fuse_get_rw_dynptr)
+BTF_ID_FLAGS(func, bpf_fuse_get_ro_dynptr)
+BTF_ID_FLAGS(func, bpf_fuse_return_len)
+BTF_SET8_END(fuse_kfunc_set)
+
+static const struct btf_kfunc_id_set bpf_fuse_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set = &fuse_kfunc_set,
+};
+
+static int __init bpf_fuse_kfuncs_init(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
+					 &bpf_fuse_kfunc_set);
+}
+
+late_initcall(bpf_fuse_kfuncs_init);
+
+static const struct bpf_func_proto *bpf_fuse_get_func_proto(enum bpf_func_id func_id,
+							      const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static bool bpf_fuse_is_valid_access(int off, int size,
+				    enum bpf_access_type type,
+				    const struct bpf_prog *prog,
+				    struct bpf_insn_access_aux *info)
+{
+	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+const struct btf_type *fuse_buffer_struct_type;
+
+static int bpf_fuse_btf_struct_access(struct bpf_verifier_log *log,
+					const struct bpf_reg_state *reg,
+					int off, int size)
+{
+	const struct btf_type *t;
+
+	t = btf_type_by_id(reg->btf, reg->btf_id);
+	if (t == fuse_buffer_struct_type) {
+		bpf_log(log,
+			"direct access to fuse_buffer is disallowed\n");
+		return -EACCES;
+	}
+
+	return 0;
+}
+
+static const struct bpf_verifier_ops bpf_fuse_verifier_ops = {
+	.get_func_proto		= bpf_fuse_get_func_proto,
+	.is_valid_access	= bpf_fuse_is_valid_access,
+	.btf_struct_access	= bpf_fuse_btf_struct_access,
+};
+
+static int bpf_fuse_check_member(const struct btf_type *t,
+				   const struct btf_member *member,
+				   const struct bpf_prog *prog)
+{
+	//if (is_unsupported(__btf_member_bit_offset(t, member) / 8))
+	//	return -ENOTSUPP;
+	return 0;
+}
+
+static int bpf_fuse_init_member(const struct btf_type *t,
+				  const struct btf_member *member,
+				  void *kdata, const void *udata)
+{
+	const struct fuse_ops *uf_ops;
+	struct fuse_ops *f_ops;
+	u32 moff;
+
+	uf_ops = (const struct fuse_ops *)udata;
+	f_ops = (struct fuse_ops *)kdata;
+
+	moff = __btf_member_bit_offset(t, member) / 8;
+	switch (moff) {
+	case offsetof(struct fuse_ops, name):
+		if (bpf_obj_name_cpy(f_ops->name, uf_ops->name,
+				     sizeof(f_ops->name)) <= 0)
+			return -EINVAL;
+		//if (tcp_ca_find(utcp_ca->name))
+		//	return -EEXIST;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int bpf_fuse_init(struct btf *btf)
+{
+	s32 type_id;
+
+	type_id = btf_find_by_name_kind(btf, "fuse_buffer", BTF_KIND_STRUCT);
+	if (type_id < 0)
+		return -EINVAL;
+	fuse_buffer_struct_type = btf_type_by_id(btf, type_id);
+
+	return 0;
+}
+
+static struct bpf_fuse_ops_attach *fuse_reg = NULL;
+
+static int bpf_fuse_reg(void *kdata)
+{
+	if (fuse_reg)
+		return fuse_reg->fuse_register_bpf(kdata);
+	pr_warn("Cannot register fuse_ops, FUSE not found");
+	return -EOPNOTSUPP;
+}
+
+static void bpf_fuse_unreg(void *kdata)
+{
+	if(fuse_reg)
+		return fuse_reg->fuse_unregister_bpf(kdata);
+}
+
+int register_fuse_bpf(struct bpf_fuse_ops_attach *reg_ops)
+{
+	fuse_reg = reg_ops;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_fuse_bpf);
+
+void unregister_fuse_bpf(struct bpf_fuse_ops_attach *reg_ops)
+{
+	if (reg_ops == fuse_reg)
+		fuse_reg = NULL;
+	else
+		pr_warn("Refusing to unregister unregistered FUSE");
+}
+EXPORT_SYMBOL_GPL(unregister_fuse_bpf);
+
+/* "extern" is to avoid sparse warning.  It is only used in bpf_struct_ops.c. */
+extern struct bpf_struct_ops bpf_fuse_ops;
+
+struct bpf_struct_ops bpf_fuse_ops = {
+	.verifier_ops = &bpf_fuse_verifier_ops,
+	.reg = bpf_fuse_reg,
+	.unreg = bpf_fuse_unreg,
+	.check_member = bpf_fuse_check_member,
+	.init_member = bpf_fuse_init_member,
+	.init = bpf_fuse_init,
+	.name = "fuse_ops",
+};
+
diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
index 5678a9ddf817..fabb2c1a9482 100644
--- a/kernel/bpf/bpf_struct_ops_types.h
+++ b/kernel/bpf/bpf_struct_ops_types.h
@@ -5,6 +5,10 @@ 
 #ifdef CONFIG_NET
 BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
 #endif
+#ifdef CONFIG_FUSE_BPF
+#include <linux/bpf_fuse.h>
+BPF_STRUCT_OPS_TYPE(fuse_ops)
+#endif
 #ifdef CONFIG_INET
 #include <net/tcp.h>
 BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 027f9f8a3551..c34fd9e70039 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -25,6 +25,7 @@ 
 #include <linux/bsearch.h>
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
+#include <linux/bpf_fuse.h>
 #include <net/sock.h>
 #include "../tools/lib/bpf/relo_core.h"
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fd959824469d..b3bda15283c0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9597,6 +9597,8 @@  enum special_kfunc_type {
 	KF_bpf_dynptr_from_xdp,
 	KF_bpf_dynptr_slice,
 	KF_bpf_dynptr_slice_rdwr,
+	KF_bpf_fuse_get_rw_dynptr,
+	KF_bpf_fuse_get_ro_dynptr,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -9616,6 +9618,8 @@  BTF_ID(func, bpf_dynptr_from_skb)
 BTF_ID(func, bpf_dynptr_from_xdp)
 BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
+BTF_ID(func, bpf_fuse_get_rw_dynptr)
+BTF_ID(func, bpf_fuse_get_ro_dynptr)
 BTF_SET_END(special_kfunc_set)
 
 BTF_ID_LIST(special_kfunc_list)
@@ -9637,6 +9641,8 @@  BTF_ID(func, bpf_dynptr_from_skb)
 BTF_ID(func, bpf_dynptr_from_xdp)
 BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
+BTF_ID(func, bpf_fuse_get_rw_dynptr)
+BTF_ID(func, bpf_fuse_get_ro_dynptr)
 
 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -10349,6 +10355,9 @@  static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
 			else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp])
 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
+			else if (meta->func_id == special_kfunc_list[KF_bpf_fuse_get_rw_dynptr] ||
+					meta->func_id == special_kfunc_list[KF_bpf_fuse_get_ro_dynptr])
+				dynptr_arg_type |= DYNPTR_TYPE_LOCAL;
 
 			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
 			if (ret < 0)