diff mbox series

[v5,bpf-next,4/5] bpf: fs/xattr: Add BPF kfuncs to set and remove xattrs

Message ID 20241218044711.1723221-5-song@kernel.org (mailing list archive)
State Handled Elsewhere
Headers show
Series Enable writing xattr from BPF programs | expand

Commit Message

Song Liu Dec. 18, 2024, 4:47 a.m. UTC
Add the following kfuncs to set and remove xattrs from BPF programs:

  bpf_set_dentry_xattr
  bpf_remove_dentry_xattr
  bpf_set_dentry_xattr_locked
  bpf_remove_dentry_xattr_locked

The _locked version of these kfuncs are called from hooks where
dentry->d_inode is already locked. Instead of requiring the user
to know which version of the kfuncs to use, the verifier will pick
the proper kfunc based on the calling hook.

Signed-off-by: Song Liu <song@kernel.org>
---
 fs/bpf_fs_kfuncs.c      | 195 ++++++++++++++++++++++++++++++++++++++++
 include/linux/bpf_lsm.h |   8 ++
 kernel/bpf/verifier.c   |  55 +++++++++++-
 3 files changed, 256 insertions(+), 2 deletions(-)

Comments

Alexei Starovoitov Dec. 18, 2024, 9:20 p.m. UTC | #1
On Tue, Dec 17, 2024 at 8:48 PM Song Liu <song@kernel.org> wrote:
>
>
>  BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
> @@ -170,6 +330,10 @@ BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
>  BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
>  BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>  BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> +BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> +BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> +BTF_ID_FLAGS(func, bpf_set_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> +BTF_ID_FLAGS(func, bpf_remove_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>  BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)

The _locked() versions shouldn't be exposed to bpf prog.
Don't add them to the above set.

Also we need to somehow exclude them from being dumped into vmlinux.h

>  static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
> @@ -186,6 +350,37 @@ static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
>         .filter = bpf_fs_kfuncs_filter,
>  };
>
> +/* bpf_[set|remove]_dentry_xattr.* hooks have KF_TRUSTED_ARGS and
> + * KF_SLEEPABLE, so they are only available to sleepable hooks with
> + * dentry arguments.
> + *
> + * Setting and removing xattr requires exclusive lock on dentry->d_inode.
> + * Some hooks already locked d_inode, while some hooks have not locked
> + * d_inode. Therefore, we need different kfuncs for different hooks.
> + * Specifically, hooks in the following list (d_inode_locked_hooks)
> + * should call bpf_[set|remove]_dentry_xattr_locked; while other hooks
> + * should call bpf_[set|remove]_dentry_xattr.
> + */
> +BTF_SET_START(d_inode_locked_hooks)
> +BTF_ID(func, bpf_lsm_inode_post_removexattr)
> +BTF_ID(func, bpf_lsm_inode_post_setattr)
> +BTF_ID(func, bpf_lsm_inode_post_setxattr)
> +BTF_ID(func, bpf_lsm_inode_removexattr)
> +BTF_ID(func, bpf_lsm_inode_rmdir)
> +BTF_ID(func, bpf_lsm_inode_setattr)
> +BTF_ID(func, bpf_lsm_inode_setxattr)
> +BTF_ID(func, bpf_lsm_inode_unlink)
> +#ifdef CONFIG_SECURITY_PATH
> +BTF_ID(func, bpf_lsm_path_unlink)
> +BTF_ID(func, bpf_lsm_path_rmdir)
> +#endif /* CONFIG_SECURITY_PATH */
> +BTF_SET_END(d_inode_locked_hooks)
> +
> +bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog)
> +{
> +       return btf_id_set_contains(&d_inode_locked_hooks, prog->aux->attach_btf_id);
> +}
> +
>  static int __init bpf_fs_kfuncs_init(void)
>  {
>         return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
> diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
> index aefcd6564251..5147b10e16a2 100644
> --- a/include/linux/bpf_lsm.h
> +++ b/include/linux/bpf_lsm.h
> @@ -48,6 +48,9 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func)
>
>  int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
>                              struct bpf_retval_range *range);
> +
> +bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog);
> +
>  #else /* !CONFIG_BPF_LSM */
>
>  static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
> @@ -86,6 +89,11 @@ static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
>  {
>         return -EOPNOTSUPP;
>  }
> +
> +static inline bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog)
> +{
> +       return false;
> +}
>  #endif /* CONFIG_BPF_LSM */
>
>  #endif /* _LINUX_BPF_LSM_H */
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f27274e933e5..f0d240d46e54 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -206,6 +206,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env,
>  static void specialize_kfunc(struct bpf_verifier_env *env,
>                              u32 func_id, u16 offset, unsigned long *addr);
>  static bool is_trusted_reg(const struct bpf_reg_state *reg);
> +static void remap_kfunc_locked_func_id(struct bpf_verifier_env *env, struct bpf_insn *insn);
>
>  static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
>  {
> @@ -3224,10 +3225,12 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
>                         return -EPERM;
>                 }
>
> -               if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
> +               if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) {
>                         ret = add_subprog(env, i + insn->imm + 1);
> -               else
> +               } else {
> +                       remap_kfunc_locked_func_id(env, insn);
>                         ret = add_kfunc_call(env, insn->imm, insn->off);
> +               }
>
>                 if (ret < 0)
>                         return ret;
> @@ -11690,6 +11693,10 @@ enum special_kfunc_type {
>         KF_bpf_get_kmem_cache,
>         KF_bpf_local_irq_save,
>         KF_bpf_local_irq_restore,
> +       KF_bpf_set_dentry_xattr,
> +       KF_bpf_remove_dentry_xattr,
> +       KF_bpf_set_dentry_xattr_locked,
> +       KF_bpf_remove_dentry_xattr_locked,
>  };
>
>  BTF_SET_START(special_kfunc_set)
> @@ -11719,6 +11726,12 @@ BTF_ID(func, bpf_wq_set_callback_impl)
>  #ifdef CONFIG_CGROUPS
>  BTF_ID(func, bpf_iter_css_task_new)
>  #endif
> +#ifdef CONFIG_BPF_LSM
> +BTF_ID(func, bpf_set_dentry_xattr)
> +BTF_ID(func, bpf_remove_dentry_xattr)
> +BTF_ID(func, bpf_set_dentry_xattr_locked)
> +BTF_ID(func, bpf_remove_dentry_xattr_locked)
> +#endif
>  BTF_SET_END(special_kfunc_set)

Do they need to be a part of special_kfunc_set ?
Where is the code that uses that?

>
>  BTF_ID_LIST(special_kfunc_list)
> @@ -11762,6 +11775,44 @@ BTF_ID_UNUSED
>  BTF_ID(func, bpf_get_kmem_cache)
>  BTF_ID(func, bpf_local_irq_save)
>  BTF_ID(func, bpf_local_irq_restore)
> +#ifdef CONFIG_BPF_LSM
> +BTF_ID(func, bpf_set_dentry_xattr)
> +BTF_ID(func, bpf_remove_dentry_xattr)
> +BTF_ID(func, bpf_set_dentry_xattr_locked)
> +BTF_ID(func, bpf_remove_dentry_xattr_locked)
> +#else
> +BTF_ID_UNUSED
> +BTF_ID_UNUSED
> +BTF_ID_UNUSED
> +BTF_ID_UNUSED
> +#endif
> +
> +/* Sometimes, we need slightly different verions of a kfunc for different

versions

> + * contexts/hooks, for example, bpf_set_dentry_xattr vs.
> + * bpf_set_dentry_xattr_locked. The former kfunc need to lock the inode
> + * rwsem, while the latter is called with the inode rwsem held (by the
> + * caller).
> + *
> + * To avoid burden on the users, we allow either version of the kfunc in
> + * either context. Then the verifier will remap the kfunc to the proper
> + * version based the context.
> + */
> +static void remap_kfunc_locked_func_id(struct bpf_verifier_env *env, struct bpf_insn *insn)
> +{
> +       u32 func_id = insn->imm;
> +
> +       if (bpf_lsm_has_d_inode_locked(env->prog)) {
> +               if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr])
> +                       insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr_locked];
> +               else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr])
> +                       insn->imm = special_kfunc_list[KF_bpf_remove_dentry_xattr_locked];
> +       } else {
> +               if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr_locked])
> +                       insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr];

This part is not necessary.
_locked() shouldn't be exposed and it should be an error
if bpf prog attempts to use invalid kfunc.

> +               else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr_locked])
> +                       insn->imm = special_kfunc_list[KF_bpf_remove_dentry_xattr];
> +       }
> +}
>
>  static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
>  {
> --
> 2.43.5
>
Song Liu Dec. 18, 2024, 9:47 p.m. UTC | #2
Hi Alexei, 

Thanks for the review!

> On Dec 18, 2024, at 1:20 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
> On Tue, Dec 17, 2024 at 8:48 PM Song Liu <song@kernel.org> wrote:
>> 
>> 
>> BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
>> @@ -170,6 +330,10 @@ BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
>> BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
>> BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>> BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>> +BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>> +BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>> +BTF_ID_FLAGS(func, bpf_set_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>> +BTF_ID_FLAGS(func, bpf_remove_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>> BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
> 
> The _locked() versions shouldn't be exposed to bpf prog.
> Don't add them to the above set.
> 
> Also we need to somehow exclude them from being dumped into vmlinux.h
> 
>> static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
>> @@ -186,6 +350,37 @@ static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
>>        .filter = bpf_fs_kfuncs_filter,
>> };

[...]

>> + */
>> +static void remap_kfunc_locked_func_id(struct bpf_verifier_env *env, struct bpf_insn *insn)
>> +{
>> +       u32 func_id = insn->imm;
>> +
>> +       if (bpf_lsm_has_d_inode_locked(env->prog)) {
>> +               if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr])
>> +                       insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr_locked];
>> +               else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr])
>> +                       insn->imm = special_kfunc_list[KF_bpf_remove_dentry_xattr_locked];
>> +       } else {
>> +               if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr_locked])
>> +                       insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr];
> 
> This part is not necessary.
> _locked() shouldn't be exposed and it should be an error
> if bpf prog attempts to use invalid kfunc.

I was implementing this in different way than the solution you and Kumar
suggested. Instead of updating this in add_kfunc_call, check_kfunc_call, 
and fixup_kfunc_call, remap_kfunc_locked_func_id happens before 
add_kfunc_call. Then, for the rest of the process, the verifier handles
_locked version and not _locked version as two different kfuncs. This is
why we need the _locked version in bpf_fs_kfunc_set_ids. I personally 
think this approach is a lot cleaner. 

I think the missing piece is to exclude the _locked version from 
vmlinux.h. Maybe we can achieve this by adding a different DECL_TAG 
to these kfuncs?

Does this make sense?

Thanks,
Song
Song Liu Dec. 18, 2024, 10:10 p.m. UTC | #3
> On Dec 18, 2024, at 1:47 PM, Song Liu <songliubraving@meta.com> wrote:
> 
> Hi Alexei, 
> 
> Thanks for the review!
> 
>> On Dec 18, 2024, at 1:20 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>> 
>> On Tue, Dec 17, 2024 at 8:48 PM Song Liu <song@kernel.org> wrote:
>>> 
>>> 
>>> BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
>>> @@ -170,6 +330,10 @@ BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
>>> BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
>>> BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>>> BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>>> +BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>>> +BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>>> +BTF_ID_FLAGS(func, bpf_set_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>>> +BTF_ID_FLAGS(func, bpf_remove_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
>>> BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
>> 
>> The _locked() versions shouldn't be exposed to bpf prog.
>> Don't add them to the above set.
>> 
>> Also we need to somehow exclude them from being dumped into vmlinux.h
>> 
>>> static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
>>> @@ -186,6 +350,37 @@ static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
>>>       .filter = bpf_fs_kfuncs_filter,
>>> };
> 
> [...]
> 
>>> + */
>>> +static void remap_kfunc_locked_func_id(struct bpf_verifier_env *env, struct bpf_insn *insn)
>>> +{
>>> +       u32 func_id = insn->imm;
>>> +
>>> +       if (bpf_lsm_has_d_inode_locked(env->prog)) {
>>> +               if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr])
>>> +                       insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr_locked];
>>> +               else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr])
>>> +                       insn->imm = special_kfunc_list[KF_bpf_remove_dentry_xattr_locked];
>>> +       } else {
>>> +               if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr_locked])
>>> +                       insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr];
>> 
>> This part is not necessary.
>> _locked() shouldn't be exposed and it should be an error
>> if bpf prog attempts to use invalid kfunc.
> 
> I was implementing this in different way than the solution you and Kumar
> suggested. Instead of updating this in add_kfunc_call, check_kfunc_call, 
> and fixup_kfunc_call, remap_kfunc_locked_func_id happens before 
> add_kfunc_call. Then, for the rest of the process, the verifier handles
> _locked version and not _locked version as two different kfuncs. This is
> why we need the _locked version in bpf_fs_kfunc_set_ids. I personally 
> think this approach is a lot cleaner. 
> 
> I think the missing piece is to exclude the _locked version from 
> vmlinux.h. Maybe we can achieve this by adding a different DECL_TAG 
> to these kfuncs?

Looked into the code, I think it is doable:

1. Extend struct btf_kfunc_id_set with "struct btf_id_set8 *shadow_set",
   or a different name;
2. Add _locked kfuncs to shadow_set, and these kfuncs will not have 
   BTF_SET8_KFUNCS set. Then pahole will not generate DECL_TAG of 
   "bpf_kfunc" for these. 
3. __btf_kfunc_id_set_contains() will need to look up id from shadow_set.
   And the filter function needs to handle shadow_set. 

Does this sound sane? 

Thanks,
Song
Alexei Starovoitov Dec. 19, 2024, 12:17 a.m. UTC | #4
On Wed, Dec 18, 2024 at 1:47 PM Song Liu <songliubraving@meta.com> wrote:
>
> Hi Alexei,
>
> Thanks for the review!
>
> > On Dec 18, 2024, at 1:20 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Dec 17, 2024 at 8:48 PM Song Liu <song@kernel.org> wrote:
> >>
> >>
> >> BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
> >> @@ -170,6 +330,10 @@ BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
> >> BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
> >> BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> >> BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> >> +BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> >> +BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> >> +BTF_ID_FLAGS(func, bpf_set_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> >> +BTF_ID_FLAGS(func, bpf_remove_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
> >> BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
> >
> > The _locked() versions shouldn't be exposed to bpf prog.
> > Don't add them to the above set.
> >
> > Also we need to somehow exclude them from being dumped into vmlinux.h
> >
> >> static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
> >> @@ -186,6 +350,37 @@ static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
> >>        .filter = bpf_fs_kfuncs_filter,
> >> };
>
> [...]
>
> >> + */
> >> +static void remap_kfunc_locked_func_id(struct bpf_verifier_env *env, struct bpf_insn *insn)
> >> +{
> >> +       u32 func_id = insn->imm;
> >> +
> >> +       if (bpf_lsm_has_d_inode_locked(env->prog)) {
> >> +               if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr])
> >> +                       insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr_locked];
> >> +               else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr])
> >> +                       insn->imm = special_kfunc_list[KF_bpf_remove_dentry_xattr_locked];
> >> +       } else {
> >> +               if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr_locked])
> >> +                       insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr];
> >
> > This part is not necessary.
> > _locked() shouldn't be exposed and it should be an error
> > if bpf prog attempts to use invalid kfunc.
>
> I was implementing this in different way than the solution you and Kumar
> suggested. Instead of updating this in add_kfunc_call, check_kfunc_call,
> and fixup_kfunc_call, remap_kfunc_locked_func_id happens before
> add_kfunc_call. Then, for the rest of the process, the verifier handles
> _locked version and not _locked version as two different kfuncs. This is
> why we need the _locked version in bpf_fs_kfunc_set_ids. I personally
> think this approach is a lot cleaner.

I see. Blind rewrite in add_kfunc_call() looks simpler,
but allowing progs call _locked() version directly is not clean.

See specialize_kfunc() as an existing approach that does polymorphism.

_locked() doesn't need to be __bpf_kfunc annotated.
It can be just like bpf_dynptr_from_skb_rdonly.

There will be no issue with vmlinux.h as well.
Song Liu Dec. 19, 2024, 6:59 a.m. UTC | #5
> On Dec 18, 2024, at 4:17 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

[...]

>>> This part is not necessary.
>>> _locked() shouldn't be exposed and it should be an error
>>> if bpf prog attempts to use invalid kfunc.
>> 
>> I was implementing this in different way than the solution you and Kumar
>> suggested. Instead of updating this in add_kfunc_call, check_kfunc_call,
>> and fixup_kfunc_call, remap_kfunc_locked_func_id happens before
>> add_kfunc_call. Then, for the rest of the process, the verifier handles
>> _locked version and not _locked version as two different kfuncs. This is
>> why we need the _locked version in bpf_fs_kfunc_set_ids. I personally
>> think this approach is a lot cleaner.
> 
> I see. Blind rewrite in add_kfunc_call() looks simpler,
> but allowing progs call _locked() version directly is not clean.

Agreed. 

> 
> See specialize_kfunc() as an existing approach that does polymorphism.
> 
> _locked() doesn't need to be __bpf_kfunc annotated.
> It can be just like bpf_dynptr_from_skb_rdonly.

I am thinking about a more modular approach. Instead of pushing the
polymorphism logic to verifer.c, we can have each btf_kfunc_id_set 
handle the remap of its kfuncs. Specifically, we can extend 
btf_kfunc_id_set as:

typedef u32 (*btf_kfunc_remap_t)(const struct bpf_prog *prog, u32 kfunc_id);

struct btf_kfunc_id_set {
        struct module *owner;
        struct btf_id_set8 *set;
        /* hidden_set contains kfuncs that are not marked as kfunc in
         * vmlinux.h. These kfuncs are usually a variation of a kfunc
         * in @set.
         */
        struct btf_id_set8 *hidden_set;
        btf_kfunc_filter_t filter;
        /* @remap method matches kfuncs in @set to proper version in
         * @hidden_set.
         */
        btf_kfunc_remap_t remap;
};

In this case, not_locked version of kfuncs will be added to @set;
while _locked kfuncs will be added to @hidden_set. @hidden_set 
will not be exposed in vmlinux.h. Then the new remap method is 
used to map not_locked kfuncs to _locked kfuncs for inode-locked 
context. 

We can also move bpf_dynptr_from_skb_rdonly to this model, and 
simplify specialize_kfunc(). 

I will send patch for this version for review. 

Thanks,
Song
diff mbox series

Patch

diff --git a/fs/bpf_fs_kfuncs.c b/fs/bpf_fs_kfuncs.c
index 8a65184c8c2c..b3cc6330ab69 100644
--- a/fs/bpf_fs_kfuncs.c
+++ b/fs/bpf_fs_kfuncs.c
@@ -2,10 +2,12 @@ 
 /* Copyright (c) 2024 Google LLC. */
 
 #include <linux/bpf.h>
+#include <linux/bpf_lsm.h>
 #include <linux/btf.h>
 #include <linux/btf_ids.h>
 #include <linux/dcache.h>
 #include <linux/fs.h>
+#include <linux/fsnotify.h>
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/xattr.h>
@@ -161,6 +163,164 @@  __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
 	return bpf_get_dentry_xattr(dentry, name__str, value_p);
 }
 
+static int bpf_xattr_write_permission(const char *name, struct inode *inode)
+{
+	if (WARN_ON(!inode))
+		return -EINVAL;
+
+	/* Only allow setting and removing security.bpf. xattrs */
+	if (!match_security_bpf_prefix(name))
+		return -EPERM;
+
+	return inode_permission(&nop_mnt_idmap, inode, MAY_WRITE);
+}
+
+static int __bpf_set_dentry_xattr(struct dentry *dentry, const char *name,
+				  const struct bpf_dynptr *value_p, int flags, bool lock_inode)
+{
+	struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
+	struct inode *inode = d_inode(dentry);
+	const void *value;
+	u32 value_len;
+	int ret;
+
+	value_len = __bpf_dynptr_size(value_ptr);
+	value = __bpf_dynptr_data(value_ptr, value_len);
+	if (!value)
+		return -EINVAL;
+
+	if (lock_inode)
+		inode_lock(inode);
+
+	ret = bpf_xattr_write_permission(name, inode);
+	if (ret)
+		goto out;
+
+	ret = __vfs_setxattr(&nop_mnt_idmap, dentry, inode, name,
+			     value, value_len, flags);
+	if (!ret) {
+		fsnotify_xattr(dentry);
+
+		/* This xattr is set by BPF LSM, so we do not call
+		 * security_inode_post_setxattr. This is the same as
+		 * security_inode_setsecurity().
+		 */
+	}
+out:
+	if (lock_inode)
+		inode_unlock(inode);
+	return ret;
+}
+
+/**
+ * bpf_set_dentry_xattr - set a xattr of a dentry
+ * @dentry: dentry to get xattr from
+ * @name__str: name of the xattr
+ * @value_p: xattr value
+ * @flags: flags to pass into filesystem operations
+ *
+ * Set xattr *name__str* of *dentry* to the value in *value_ptr*.
+ *
+ * For security reasons, only *name__str* with prefix "security.bpf."
+ * is allowed.
+ *
+ * The caller has not locked dentry->d_inode.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str,
+				     const struct bpf_dynptr *value_p, int flags)
+{
+	return __bpf_set_dentry_xattr(dentry, name__str, value_p, flags, true);
+}
+
+/**
+ * bpf_set_dentry_xattr_locked - set a xattr of a dentry
+ * @dentry: dentry to get xattr from
+ * @name__str: name of the xattr
+ * @value_p: xattr value
+ * @flags: flags to pass into filesystem operations
+ *
+ * Set xattr *name__str* of *dentry* to the value in *value_ptr*.
+ *
+ * For security reasons, only *name__str* with prefix "security.bpf."
+ * is allowed.
+ *
+ * The caller already locked dentry->d_inode.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str,
+					    const struct bpf_dynptr *value_p, int flags)
+{
+	return __bpf_set_dentry_xattr(dentry, name__str, value_p, flags, false);
+}
+
+static int __bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str,
+				     bool lock_inode)
+{
+	struct inode *inode = d_inode(dentry);
+	int ret;
+
+	if (lock_inode)
+		inode_lock(inode);
+
+	ret = bpf_xattr_write_permission(name__str, inode);
+	if (ret)
+		goto out;
+
+	ret = __vfs_removexattr(&nop_mnt_idmap, dentry, name__str);
+	if (!ret) {
+		fsnotify_xattr(dentry);
+
+		/* This xattr is removed by BPF LSM, so we do not call
+		 * security_inode_post_removexattr.
+		 */
+	}
+out:
+	if (lock_inode)
+		inode_unlock(inode);
+	return ret;
+}
+
+/**
+ * bpf_remove_dentry_xattr - remove a xattr of a dentry
+ * @dentry: dentry to get xattr from
+ * @name__str: name of the xattr
+ *
+ * Rmove xattr *name__str* of *dentry*.
+ *
+ * For security reasons, only *name__str* with prefix "security.bpf."
+ * is allowed.
+ *
+ * The caller has not locked dentry->d_inode.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str)
+{
+	return __bpf_remove_dentry_xattr(dentry, name__str, true);
+}
+
+/**
+ * bpf_remove_dentry_xattr_locked - remove a xattr of a dentry
+ * @dentry: dentry to get xattr from
+ * @name__str: name of the xattr
+ *
+ * Rmove xattr *name__str* of *dentry*.
+ *
+ * For security reasons, only *name__str* with prefix "security.bpf."
+ * is allowed.
+ *
+ * The caller already locked dentry->d_inode.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str)
+{
+	return __bpf_remove_dentry_xattr(dentry, name__str, false);
+}
+
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
@@ -170,6 +330,10 @@  BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_set_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_remove_dentry_xattr_locked, KF_SLEEPABLE | KF_TRUSTED_ARGS)
 BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
 
 static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
@@ -186,6 +350,37 @@  static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
 	.filter = bpf_fs_kfuncs_filter,
 };
 
+/* bpf_[set|remove]_dentry_xattr.* hooks have KF_TRUSTED_ARGS and
+ * KF_SLEEPABLE, so they are only available to sleepable hooks with
+ * dentry arguments.
+ *
+ * Setting and removing xattr requires exclusive lock on dentry->d_inode.
+ * Some hooks already locked d_inode, while some hooks have not locked
+ * d_inode. Therefore, we need different kfuncs for different hooks.
+ * Specifically, hooks in the following list (d_inode_locked_hooks)
+ * should call bpf_[set|remove]_dentry_xattr_locked; while other hooks
+ * should call bpf_[set|remove]_dentry_xattr.
+ */
+BTF_SET_START(d_inode_locked_hooks)
+BTF_ID(func, bpf_lsm_inode_post_removexattr)
+BTF_ID(func, bpf_lsm_inode_post_setattr)
+BTF_ID(func, bpf_lsm_inode_post_setxattr)
+BTF_ID(func, bpf_lsm_inode_removexattr)
+BTF_ID(func, bpf_lsm_inode_rmdir)
+BTF_ID(func, bpf_lsm_inode_setattr)
+BTF_ID(func, bpf_lsm_inode_setxattr)
+BTF_ID(func, bpf_lsm_inode_unlink)
+#ifdef CONFIG_SECURITY_PATH
+BTF_ID(func, bpf_lsm_path_unlink)
+BTF_ID(func, bpf_lsm_path_rmdir)
+#endif /* CONFIG_SECURITY_PATH */
+BTF_SET_END(d_inode_locked_hooks)
+
+bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog)
+{
+	return btf_id_set_contains(&d_inode_locked_hooks, prog->aux->attach_btf_id);
+}
+
 static int __init bpf_fs_kfuncs_init(void)
 {
 	return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index aefcd6564251..5147b10e16a2 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -48,6 +48,9 @@  void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func)
 
 int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
 			     struct bpf_retval_range *range);
+
+bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog);
+
 #else /* !CONFIG_BPF_LSM */
 
 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -86,6 +89,11 @@  static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog)
+{
+	return false;
+}
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f27274e933e5..f0d240d46e54 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -206,6 +206,7 @@  static int ref_set_non_owning(struct bpf_verifier_env *env,
 static void specialize_kfunc(struct bpf_verifier_env *env,
 			     u32 func_id, u16 offset, unsigned long *addr);
 static bool is_trusted_reg(const struct bpf_reg_state *reg);
+static void remap_kfunc_locked_func_id(struct bpf_verifier_env *env, struct bpf_insn *insn);
 
 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 {
@@ -3224,10 +3225,12 @@  static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 			return -EPERM;
 		}
 
-		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
+		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) {
 			ret = add_subprog(env, i + insn->imm + 1);
-		else
+		} else {
+			remap_kfunc_locked_func_id(env, insn);
 			ret = add_kfunc_call(env, insn->imm, insn->off);
+		}
 
 		if (ret < 0)
 			return ret;
@@ -11690,6 +11693,10 @@  enum special_kfunc_type {
 	KF_bpf_get_kmem_cache,
 	KF_bpf_local_irq_save,
 	KF_bpf_local_irq_restore,
+	KF_bpf_set_dentry_xattr,
+	KF_bpf_remove_dentry_xattr,
+	KF_bpf_set_dentry_xattr_locked,
+	KF_bpf_remove_dentry_xattr_locked,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -11719,6 +11726,12 @@  BTF_ID(func, bpf_wq_set_callback_impl)
 #ifdef CONFIG_CGROUPS
 BTF_ID(func, bpf_iter_css_task_new)
 #endif
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_set_dentry_xattr)
+BTF_ID(func, bpf_remove_dentry_xattr)
+BTF_ID(func, bpf_set_dentry_xattr_locked)
+BTF_ID(func, bpf_remove_dentry_xattr_locked)
+#endif
 BTF_SET_END(special_kfunc_set)
 
 BTF_ID_LIST(special_kfunc_list)
@@ -11762,6 +11775,44 @@  BTF_ID_UNUSED
 BTF_ID(func, bpf_get_kmem_cache)
 BTF_ID(func, bpf_local_irq_save)
 BTF_ID(func, bpf_local_irq_restore)
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_set_dentry_xattr)
+BTF_ID(func, bpf_remove_dentry_xattr)
+BTF_ID(func, bpf_set_dentry_xattr_locked)
+BTF_ID(func, bpf_remove_dentry_xattr_locked)
+#else
+BTF_ID_UNUSED
+BTF_ID_UNUSED
+BTF_ID_UNUSED
+BTF_ID_UNUSED
+#endif
+
+/* Sometimes, we need slightly different verions of a kfunc for different
+ * contexts/hooks, for example, bpf_set_dentry_xattr vs.
+ * bpf_set_dentry_xattr_locked. The former kfunc need to lock the inode
+ * rwsem, while the latter is called with the inode rwsem held (by the
+ * caller).
+ *
+ * To avoid burden on the users, we allow either version of the kfunc in
+ * either context. Then the verifier will remap the kfunc to the proper
+ * version based the context.
+ */
+static void remap_kfunc_locked_func_id(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+	u32 func_id = insn->imm;
+
+	if (bpf_lsm_has_d_inode_locked(env->prog)) {
+		if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr])
+			insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr_locked];
+		else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr])
+			insn->imm = special_kfunc_list[KF_bpf_remove_dentry_xattr_locked];
+	} else {
+		if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr_locked])
+			insn->imm =  special_kfunc_list[KF_bpf_set_dentry_xattr];
+		else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr_locked])
+			insn->imm = special_kfunc_list[KF_bpf_remove_dentry_xattr];
+	}
+}
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {