diff mbox series

[v1] fs: Add kfuncs to handle idmapped mounts

Message ID c35fbb4cb0a3a9b4653f9a032698469d94ca6e9c.1688123230.git.legion@kernel.org (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series [v1] fs: Add kfuncs to handle idmapped mounts | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch, async
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 pending Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on s390x with gcc

Commit Message

Alexey Gladkov June 30, 2023, 11:08 a.m. UTC
Since the introduction of idmapped mounts, file handling has become
somewhat more complicated. If the inode has been found through an
idmapped mount the idmap of the vfsmount must be used to get proper
i_uid / i_gid. This is important, for example, to correctly take into
account idmapped files when caching, LSM or for an audit.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
---
 fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

Comments

Hou Tao July 4, 2023, 11:42 a.m. UTC | #1
Hi,

On 6/30/2023 7:08 PM, Alexey Gladkov wrote:
> Since the introduction of idmapped mounts, file handling has become
> somewhat more complicated. If the inode has been found through an
> idmapped mount the idmap of the vfsmount must be used to get proper
> i_uid / i_gid. This is important, for example, to correctly take into
> account idmapped files when caching, LSM or for an audit.

Could you please add a bpf selftest for these newly added kfuncs ?
>
> Signed-off-by: Alexey Gladkov <legion@kernel.org>
> ---
>  fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 69 insertions(+)
>
> diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> index 4905665c47d0..ba98ce26b883 100644
> --- a/fs/mnt_idmapping.c
> +++ b/fs/mnt_idmapping.c
> @@ -6,6 +6,7 @@
>  #include <linux/mnt_idmapping.h>
>  #include <linux/slab.h>
>  #include <linux/user_namespace.h>
> +#include <linux/bpf.h>
>  
>  #include "internal.h"
>  
> @@ -271,3 +272,71 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
>  		kfree(idmap);
>  	}
>  }
> +
> +__diag_push();
> +__diag_ignore_all("-Wmissing-prototypes",
> +		  "Global functions as their definitions will be in vmlinux BTF");
> +
> +/**
> + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> + * @mnt: the mount to check
> + *
> + * Return: true if mount is mapped, false if not.
> + */
> +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> +{
> +	return is_idmapped_mnt(mnt);
> +}
> +
> +/**
> + * bpf_file_mnt_idmap - get file idmapping
> + * @file: the file from which to get mapping
> + *
> + * Return: The idmap for the @file.
> + */
> +__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
> +{
> +	return file_mnt_idmap(file);
> +}

A dummy question here: the implementation of file_mnt_idmap() is
file->f_path.mnt->mnt_idmap, so if the passed file is a BTF pointer, is
there any reason why we could not do such dereference directly in bpf
program ?
> +
> +/**
> + * bpf_inode_into_vfs_ids - map an inode's i_uid and i_gid down according to an idmapping
> + * @idmap: idmap of the mount the inode was found from
> + * @inode: inode to map
> + *
> + * The inode's i_uid and i_gid mapped down according to @idmap. If the inode's
> + * i_uid or i_gid has no mapping INVALID_VFSUID or INVALID_VFSGID is returned in
> + * the corresponding position.
> + *
> + * Return: A 64-bit integer containing the current GID and UID, and created as
> + * such: *gid* **<< 32 \|** *uid*.
> + */
> +__bpf_kfunc uint64_t bpf_inode_into_vfs_ids(struct mnt_idmap *idmap,
> +		const struct inode *inode)
> +{
> +	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
> +	vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
> +
> +	return (u64) __vfsgid_val(vfsgid) << 32 |
> +		     __vfsuid_val(vfsuid);
> +}
> +
> +__diag_pop();
> +
> +BTF_SET8_START(idmap_btf_ids)
> +BTF_ID_FLAGS(func, bpf_is_idmapped_mnt)
> +BTF_ID_FLAGS(func, bpf_file_mnt_idmap)
> +BTF_ID_FLAGS(func, bpf_inode_into_vfs_ids)
> +BTF_SET8_END(idmap_btf_ids)
> +
> +static const struct btf_kfunc_id_set idmap_kfunc_set = {
> +	.owner = THIS_MODULE,
> +	.set   = &idmap_btf_ids,
> +};
> +
> +static int __init bpf_idmap_kfunc_init(void)
> +{
> +	return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &idmap_kfunc_set);
> +}
> +
Is BPF_PROG_TYPE_TRACING sufficient for your use case ? It seems
BPF_PROG_TYPE_UNSPEC will make these kfuncs be available for all bpf
program types.
> +late_initcall(bpf_idmap_kfunc_init);
Christian Brauner July 4, 2023, 1:01 p.m. UTC | #2
On Tue, Jul 04, 2023 at 07:42:53PM +0800, Hou Tao wrote:
> Hi,
> 
> On 6/30/2023 7:08 PM, Alexey Gladkov wrote:
> > Since the introduction of idmapped mounts, file handling has become
> > somewhat more complicated. If the inode has been found through an
> > idmapped mount the idmap of the vfsmount must be used to get proper
> > i_uid / i_gid. This is important, for example, to correctly take into
> > account idmapped files when caching, LSM or for an audit.
> 
> Could you please add a bpf selftest for these newly added kfuncs ?
> >
> > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > ---
> >  fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 69 insertions(+)
> >
> > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> > index 4905665c47d0..ba98ce26b883 100644
> > --- a/fs/mnt_idmapping.c
> > +++ b/fs/mnt_idmapping.c
> > @@ -6,6 +6,7 @@
> >  #include <linux/mnt_idmapping.h>
> >  #include <linux/slab.h>
> >  #include <linux/user_namespace.h>
> > +#include <linux/bpf.h>
> >  
> >  #include "internal.h"
> >  
> > @@ -271,3 +272,71 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
> >  		kfree(idmap);
> >  	}
> >  }
> > +
> > +__diag_push();
> > +__diag_ignore_all("-Wmissing-prototypes",
> > +		  "Global functions as their definitions will be in vmlinux BTF");
> > +
> > +/**
> > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > + * @mnt: the mount to check
> > + *
> > + * Return: true if mount is mapped, false if not.
> > + */
> > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > +{
> > +	return is_idmapped_mnt(mnt);
> > +}
> > +
> > +/**
> > + * bpf_file_mnt_idmap - get file idmapping
> > + * @file: the file from which to get mapping
> > + *
> > + * Return: The idmap for the @file.
> > + */
> > +__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
> > +{
> > +	return file_mnt_idmap(file);
> > +}
> 
> A dummy question here: the implementation of file_mnt_idmap() is
> file->f_path.mnt->mnt_idmap, so if the passed file is a BTF pointer, is
> there any reason why we could not do such dereference directly in bpf
> program ?
> > +
> > +/**
> > + * bpf_inode_into_vfs_ids - map an inode's i_uid and i_gid down according to an idmapping
> > + * @idmap: idmap of the mount the inode was found from
> > + * @inode: inode to map
> > + *
> > + * The inode's i_uid and i_gid mapped down according to @idmap. If the inode's
> > + * i_uid or i_gid has no mapping INVALID_VFSUID or INVALID_VFSGID is returned in
> > + * the corresponding position.
> > + *
> > + * Return: A 64-bit integer containing the current GID and UID, and created as
> > + * such: *gid* **<< 32 \|** *uid*.
> > + */
> > +__bpf_kfunc uint64_t bpf_inode_into_vfs_ids(struct mnt_idmap *idmap,
> > +		const struct inode *inode)
> > +{
> > +	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
> > +	vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
> > +
> > +	return (u64) __vfsgid_val(vfsgid) << 32 |
> > +		     __vfsuid_val(vfsuid);
> > +}
> > +
> > +__diag_pop();
> > +
> > +BTF_SET8_START(idmap_btf_ids)
> > +BTF_ID_FLAGS(func, bpf_is_idmapped_mnt)
> > +BTF_ID_FLAGS(func, bpf_file_mnt_idmap)
> > +BTF_ID_FLAGS(func, bpf_inode_into_vfs_ids)
> > +BTF_SET8_END(idmap_btf_ids)
> > +
> > +static const struct btf_kfunc_id_set idmap_kfunc_set = {
> > +	.owner = THIS_MODULE,
> > +	.set   = &idmap_btf_ids,
> > +};
> > +
> > +static int __init bpf_idmap_kfunc_init(void)
> > +{
> > +	return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &idmap_kfunc_set);
> > +}
> > +
> Is BPF_PROG_TYPE_TRACING sufficient for your use case ? It seems
> BPF_PROG_TYPE_UNSPEC will make these kfuncs be available for all bpf
> program types.
> > +late_initcall(bpf_idmap_kfunc_init);
> 

I don't want any of these helpers as kfuncs as they are peeking deeply
into implementation details that we reserve to change. Specifically in
the light of:

    3. kfunc lifecycle expectations part b):

    "Unlike with regular kernel symbols, this is expected behavior for BPF
     symbols, and out-of-tree BPF programs that use kfuncs should be considered
     relevant to discussions and decisions around modifying and removing those
     kfuncs. The BPF community will take an active role in participating in
     upstream discussions when necessary to ensure that the perspectives of such
     users are taken into account."

That's too much stability for my taste for these helpers. The helpers
here exposed have been modified multiple times and once we wean off
idmapped mounts from user namespaces completely they will change again.
So I'm fine if they're traceable but not as kfuncs with any - even
minimal - stability guarantees.
Alexey Gladkov July 4, 2023, 3:05 p.m. UTC | #3
On Tue, Jul 04, 2023 at 03:01:21PM +0200, Christian Brauner wrote:
> On Tue, Jul 04, 2023 at 07:42:53PM +0800, Hou Tao wrote:
> > Hi,
> > 
> > On 6/30/2023 7:08 PM, Alexey Gladkov wrote:
> > > Since the introduction of idmapped mounts, file handling has become
> > > somewhat more complicated. If the inode has been found through an
> > > idmapped mount the idmap of the vfsmount must be used to get proper
> > > i_uid / i_gid. This is important, for example, to correctly take into
> > > account idmapped files when caching, LSM or for an audit.
> > 
> > Could you please add a bpf selftest for these newly added kfuncs ?
> > >
> > > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > > ---
> > >  fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 69 insertions(+)
> > >
> > > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> > > index 4905665c47d0..ba98ce26b883 100644
> > > --- a/fs/mnt_idmapping.c
> > > +++ b/fs/mnt_idmapping.c
> > > @@ -6,6 +6,7 @@
> > >  #include <linux/mnt_idmapping.h>
> > >  #include <linux/slab.h>
> > >  #include <linux/user_namespace.h>
> > > +#include <linux/bpf.h>
> > >  
> > >  #include "internal.h"
> > >  
> > > @@ -271,3 +272,71 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
> > >  		kfree(idmap);
> > >  	}
> > >  }
> > > +
> > > +__diag_push();
> > > +__diag_ignore_all("-Wmissing-prototypes",
> > > +		  "Global functions as their definitions will be in vmlinux BTF");
> > > +
> > > +/**
> > > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > > + * @mnt: the mount to check
> > > + *
> > > + * Return: true if mount is mapped, false if not.
> > > + */
> > > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > > +{
> > > +	return is_idmapped_mnt(mnt);
> > > +}
> > > +
> > > +/**
> > > + * bpf_file_mnt_idmap - get file idmapping
> > > + * @file: the file from which to get mapping
> > > + *
> > > + * Return: The idmap for the @file.
> > > + */
> > > +__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
> > > +{
> > > +	return file_mnt_idmap(file);
> > > +}
> > 
> > A dummy question here: the implementation of file_mnt_idmap() is
> > file->f_path.mnt->mnt_idmap, so if the passed file is a BTF pointer, is
> > there any reason why we could not do such dereference directly in bpf
> > program ?
> > > +
> > > +/**
> > > + * bpf_inode_into_vfs_ids - map an inode's i_uid and i_gid down according to an idmapping
> > > + * @idmap: idmap of the mount the inode was found from
> > > + * @inode: inode to map
> > > + *
> > > + * The inode's i_uid and i_gid mapped down according to @idmap. If the inode's
> > > + * i_uid or i_gid has no mapping INVALID_VFSUID or INVALID_VFSGID is returned in
> > > + * the corresponding position.
> > > + *
> > > + * Return: A 64-bit integer containing the current GID and UID, and created as
> > > + * such: *gid* **<< 32 \|** *uid*.
> > > + */
> > > +__bpf_kfunc uint64_t bpf_inode_into_vfs_ids(struct mnt_idmap *idmap,
> > > +		const struct inode *inode)
> > > +{
> > > +	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
> > > +	vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
> > > +
> > > +	return (u64) __vfsgid_val(vfsgid) << 32 |
> > > +		     __vfsuid_val(vfsuid);
> > > +}
> > > +
> > > +__diag_pop();
> > > +
> > > +BTF_SET8_START(idmap_btf_ids)
> > > +BTF_ID_FLAGS(func, bpf_is_idmapped_mnt)
> > > +BTF_ID_FLAGS(func, bpf_file_mnt_idmap)
> > > +BTF_ID_FLAGS(func, bpf_inode_into_vfs_ids)
> > > +BTF_SET8_END(idmap_btf_ids)
> > > +
> > > +static const struct btf_kfunc_id_set idmap_kfunc_set = {
> > > +	.owner = THIS_MODULE,
> > > +	.set   = &idmap_btf_ids,
> > > +};
> > > +
> > > +static int __init bpf_idmap_kfunc_init(void)
> > > +{
> > > +	return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &idmap_kfunc_set);
> > > +}
> > > +
> > Is BPF_PROG_TYPE_TRACING sufficient for your use case ? It seems
> > BPF_PROG_TYPE_UNSPEC will make these kfuncs be available for all bpf
> > program types.
> > > +late_initcall(bpf_idmap_kfunc_init);
> > 
> 
> I don't want any of these helpers as kfuncs as they are peeking deeply
> into implementation details that we reserve to change. Specifically in
> the light of:
> 
>     3. kfunc lifecycle expectations part b):
> 
>     "Unlike with regular kernel symbols, this is expected behavior for BPF
>      symbols, and out-of-tree BPF programs that use kfuncs should be considered
>      relevant to discussions and decisions around modifying and removing those
>      kfuncs. The BPF community will take an active role in participating in
>      upstream discussions when necessary to ensure that the perspectives of such
>      users are taken into account."
> 
> That's too much stability for my taste for these helpers. The helpers
> here exposed have been modified multiple times and once we wean off
> idmapped mounts from user namespaces completely they will change again.
> So I'm fine if they're traceable but not as kfuncs with any - even
> minimal - stability guarantees.

I thought that the mnt_idmap interface is already quite stable. I wanted
to give a minimal interface to bpf programs.

Would it be better if I hide the mnt_idmap inside the helper or are you
against using kfuncs at the moment ?

Like that:

__bpf_kfunc uint64_t bpf_get_file_vfs_ids(struct file *file)
{
	struct mnt_idmap *idmap = file_mnt_idmap(file);
	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, file->f_inode);
	vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, file->f_inode);

	return (u64) __vfsgid_val(vfsgid) << 32 |
		     __vfsuid_val(vfsuid);
}
Alexey Gladkov July 4, 2023, 3:11 p.m. UTC | #4
On Tue, Jul 04, 2023 at 07:42:53PM +0800, Hou Tao wrote:
> Hi,
> 
> On 6/30/2023 7:08 PM, Alexey Gladkov wrote:
> > Since the introduction of idmapped mounts, file handling has become
> > somewhat more complicated. If the inode has been found through an
> > idmapped mount the idmap of the vfsmount must be used to get proper
> > i_uid / i_gid. This is important, for example, to correctly take into
> > account idmapped files when caching, LSM or for an audit.
> 
> Could you please add a bpf selftest for these newly added kfuncs ?
> >
> > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > ---
> >  fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 69 insertions(+)
> >
> > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> > index 4905665c47d0..ba98ce26b883 100644
> > --- a/fs/mnt_idmapping.c
> > +++ b/fs/mnt_idmapping.c
> > @@ -6,6 +6,7 @@
> >  #include <linux/mnt_idmapping.h>
> >  #include <linux/slab.h>
> >  #include <linux/user_namespace.h>
> > +#include <linux/bpf.h>
> >  
> >  #include "internal.h"
> >  
> > @@ -271,3 +272,71 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
> >  		kfree(idmap);
> >  	}
> >  }
> > +
> > +__diag_push();
> > +__diag_ignore_all("-Wmissing-prototypes",
> > +		  "Global functions as their definitions will be in vmlinux BTF");
> > +
> > +/**
> > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > + * @mnt: the mount to check
> > + *
> > + * Return: true if mount is mapped, false if not.
> > + */
> > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > +{
> > +	return is_idmapped_mnt(mnt);
> > +}
> > +
> > +/**
> > + * bpf_file_mnt_idmap - get file idmapping
> > + * @file: the file from which to get mapping
> > + *
> > + * Return: The idmap for the @file.
> > + */
> > +__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
> > +{
> > +	return file_mnt_idmap(file);
> > +}
> 
> A dummy question here: the implementation of file_mnt_idmap() is
> file->f_path.mnt->mnt_idmap, so if the passed file is a BTF pointer, is
> there any reason why we could not do such dereference directly in bpf
> program ?

I wanted to provide a minimal API for bpf programs. I thought that this
interface is stable enough, but after reading Christian's answer, it looks
like I was wrong.

> > +
> > +/**
> > + * bpf_inode_into_vfs_ids - map an inode's i_uid and i_gid down according to an idmapping
> > + * @idmap: idmap of the mount the inode was found from
> > + * @inode: inode to map
> > + *
> > + * The inode's i_uid and i_gid mapped down according to @idmap. If the inode's
> > + * i_uid or i_gid has no mapping INVALID_VFSUID or INVALID_VFSGID is returned in
> > + * the corresponding position.
> > + *
> > + * Return: A 64-bit integer containing the current GID and UID, and created as
> > + * such: *gid* **<< 32 \|** *uid*.
> > + */
> > +__bpf_kfunc uint64_t bpf_inode_into_vfs_ids(struct mnt_idmap *idmap,
> > +		const struct inode *inode)
> > +{
> > +	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
> > +	vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
> > +
> > +	return (u64) __vfsgid_val(vfsgid) << 32 |
> > +		     __vfsuid_val(vfsuid);
> > +}
> > +
> > +__diag_pop();
> > +
> > +BTF_SET8_START(idmap_btf_ids)
> > +BTF_ID_FLAGS(func, bpf_is_idmapped_mnt)
> > +BTF_ID_FLAGS(func, bpf_file_mnt_idmap)
> > +BTF_ID_FLAGS(func, bpf_inode_into_vfs_ids)
> > +BTF_SET8_END(idmap_btf_ids)
> > +
> > +static const struct btf_kfunc_id_set idmap_kfunc_set = {
> > +	.owner = THIS_MODULE,
> > +	.set   = &idmap_btf_ids,
> > +};
> > +
> > +static int __init bpf_idmap_kfunc_init(void)
> > +{
> > +	return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &idmap_kfunc_set);
> > +}
> > +
> Is BPF_PROG_TYPE_TRACING sufficient for your use case ? It seems
> BPF_PROG_TYPE_UNSPEC will make these kfuncs be available for all bpf
> program types.

This can be used not only in BPF_PROG_TYPE_TRACING but also at least for
BPF_PROG_TYPE_LSM.

> > +late_initcall(bpf_idmap_kfunc_init);
> 
>
Christian Brauner July 4, 2023, 3:28 p.m. UTC | #5
On Tue, Jul 04, 2023 at 05:11:12PM +0200, Alexey Gladkov wrote:
> On Tue, Jul 04, 2023 at 07:42:53PM +0800, Hou Tao wrote:
> > Hi,
> > 
> > On 6/30/2023 7:08 PM, Alexey Gladkov wrote:
> > > Since the introduction of idmapped mounts, file handling has become
> > > somewhat more complicated. If the inode has been found through an
> > > idmapped mount the idmap of the vfsmount must be used to get proper
> > > i_uid / i_gid. This is important, for example, to correctly take into
> > > account idmapped files when caching, LSM or for an audit.
> > 
> > Could you please add a bpf selftest for these newly added kfuncs ?
> > >
> > > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > > ---
> > >  fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 69 insertions(+)
> > >
> > > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> > > index 4905665c47d0..ba98ce26b883 100644
> > > --- a/fs/mnt_idmapping.c
> > > +++ b/fs/mnt_idmapping.c
> > > @@ -6,6 +6,7 @@
> > >  #include <linux/mnt_idmapping.h>
> > >  #include <linux/slab.h>
> > >  #include <linux/user_namespace.h>
> > > +#include <linux/bpf.h>
> > >  
> > >  #include "internal.h"
> > >  
> > > @@ -271,3 +272,71 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
> > >  		kfree(idmap);
> > >  	}
> > >  }
> > > +
> > > +__diag_push();
> > > +__diag_ignore_all("-Wmissing-prototypes",
> > > +		  "Global functions as their definitions will be in vmlinux BTF");
> > > +
> > > +/**
> > > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > > + * @mnt: the mount to check
> > > + *
> > > + * Return: true if mount is mapped, false if not.
> > > + */
> > > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > > +{
> > > +	return is_idmapped_mnt(mnt);
> > > +}
> > > +
> > > +/**
> > > + * bpf_file_mnt_idmap - get file idmapping
> > > + * @file: the file from which to get mapping
> > > + *
> > > + * Return: The idmap for the @file.
> > > + */
> > > +__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
> > > +{
> > > +	return file_mnt_idmap(file);
> > > +}
> > 
> > A dummy question here: the implementation of file_mnt_idmap() is
> > file->f_path.mnt->mnt_idmap, so if the passed file is a BTF pointer, is
> > there any reason why we could not do such dereference directly in bpf
> > program ?
> 
> I wanted to provide a minimal API for bpf programs. I thought that this
> interface is stable enough, but after reading Christian's answer, it looks
> like I was wrong.

It isn't even about stability per se. It's unlikely that if we change
internal details that types or arguments to these helpers change. That's
why we did the work of abstracting this all away in the first place and
making this an opaque type.

The wider point is that according to the docs, kfuncs claim to have
equivalent status to EXPORT_SYMBOL_*() with the added complexity of
maybe having to take out of tree bpf programs into account.

Right now, we can look at the in-kernel users of is_idmapped_mnt(),
convert them and then kill this thing off if we wanted to. As soon as
this is a kfunc such an endeavour becomes a measure of "f**** around and
find out". That's an entirely avoidable conflict if we don't even expose
it in the first place.
Alexey Gladkov July 5, 2023, 1:43 p.m. UTC | #6
On Tue, Jul 04, 2023 at 05:28:13PM +0200, Christian Brauner wrote:
> On Tue, Jul 04, 2023 at 05:11:12PM +0200, Alexey Gladkov wrote:
> > On Tue, Jul 04, 2023 at 07:42:53PM +0800, Hou Tao wrote:
> > > Hi,
> > > 
> > > On 6/30/2023 7:08 PM, Alexey Gladkov wrote:
> > > > Since the introduction of idmapped mounts, file handling has become
> > > > somewhat more complicated. If the inode has been found through an
> > > > idmapped mount the idmap of the vfsmount must be used to get proper
> > > > i_uid / i_gid. This is important, for example, to correctly take into
> > > > account idmapped files when caching, LSM or for an audit.
> > > 
> > > Could you please add a bpf selftest for these newly added kfuncs ?
> > > >
> > > > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > > > ---
> > > >  fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 69 insertions(+)
> > > >
> > > > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> > > > index 4905665c47d0..ba98ce26b883 100644
> > > > --- a/fs/mnt_idmapping.c
> > > > +++ b/fs/mnt_idmapping.c
> > > > @@ -6,6 +6,7 @@
> > > >  #include <linux/mnt_idmapping.h>
> > > >  #include <linux/slab.h>
> > > >  #include <linux/user_namespace.h>
> > > > +#include <linux/bpf.h>
> > > >  
> > > >  #include "internal.h"
> > > >  
> > > > @@ -271,3 +272,71 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
> > > >  		kfree(idmap);
> > > >  	}
> > > >  }
> > > > +
> > > > +__diag_push();
> > > > +__diag_ignore_all("-Wmissing-prototypes",
> > > > +		  "Global functions as their definitions will be in vmlinux BTF");
> > > > +
> > > > +/**
> > > > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > > > + * @mnt: the mount to check
> > > > + *
> > > > + * Return: true if mount is mapped, false if not.
> > > > + */
> > > > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > > > +{
> > > > +	return is_idmapped_mnt(mnt);
> > > > +}
> > > > +
> > > > +/**
> > > > + * bpf_file_mnt_idmap - get file idmapping
> > > > + * @file: the file from which to get mapping
> > > > + *
> > > > + * Return: The idmap for the @file.
> > > > + */
> > > > +__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
> > > > +{
> > > > +	return file_mnt_idmap(file);
> > > > +}
> > > 
> > > A dummy question here: the implementation of file_mnt_idmap() is
> > > file->f_path.mnt->mnt_idmap, so if the passed file is a BTF pointer, is
> > > there any reason why we could not do such dereference directly in bpf
> > > program ?
> > 
> > I wanted to provide a minimal API for bpf programs. I thought that this
> > interface is stable enough, but after reading Christian's answer, it looks
> > like I was wrong.
> 
> It isn't even about stability per se. It's unlikely that if we change
> internal details that types or arguments to these helpers change. That's
> why we did the work of abstracting this all away in the first place and
> making this an opaque type.
> 
> The wider point is that according to the docs, kfuncs claim to have
> equivalent status to EXPORT_SYMBOL_*() with the added complexity of
> maybe having to take out of tree bpf programs into account.
> 
> Right now, we can look at the in-kernel users of is_idmapped_mnt(),
> convert them and then kill this thing off if we wanted to. As soon as
> this is a kfunc such an endeavour becomes a measure of "f**** around and
> find out". That's an entirely avoidable conflict if we don't even expose
> it in the first place.
> 

I was hoping to make it possible to use is_idmapped_mnt or its equivalent
to at least be able to distinguish a file with an idmapped mount from a
regular one.
Christian Brauner July 5, 2023, 2:18 p.m. UTC | #7
On Wed, Jul 05, 2023 at 03:43:09PM +0200, Alexey Gladkov wrote:
> On Tue, Jul 04, 2023 at 05:28:13PM +0200, Christian Brauner wrote:
> > On Tue, Jul 04, 2023 at 05:11:12PM +0200, Alexey Gladkov wrote:
> > > On Tue, Jul 04, 2023 at 07:42:53PM +0800, Hou Tao wrote:
> > > > Hi,
> > > > 
> > > > On 6/30/2023 7:08 PM, Alexey Gladkov wrote:
> > > > > Since the introduction of idmapped mounts, file handling has become
> > > > > somewhat more complicated. If the inode has been found through an
> > > > > idmapped mount the idmap of the vfsmount must be used to get proper
> > > > > i_uid / i_gid. This is important, for example, to correctly take into
> > > > > account idmapped files when caching, LSM or for an audit.
> > > > 
> > > > Could you please add a bpf selftest for these newly added kfuncs ?
> > > > >
> > > > > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > > > > ---
> > > > >  fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > >  1 file changed, 69 insertions(+)
> > > > >
> > > > > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> > > > > index 4905665c47d0..ba98ce26b883 100644
> > > > > --- a/fs/mnt_idmapping.c
> > > > > +++ b/fs/mnt_idmapping.c
> > > > > @@ -6,6 +6,7 @@
> > > > >  #include <linux/mnt_idmapping.h>
> > > > >  #include <linux/slab.h>
> > > > >  #include <linux/user_namespace.h>
> > > > > +#include <linux/bpf.h>
> > > > >  
> > > > >  #include "internal.h"
> > > > >  
> > > > > @@ -271,3 +272,71 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
> > > > >  		kfree(idmap);
> > > > >  	}
> > > > >  }
> > > > > +
> > > > > +__diag_push();
> > > > > +__diag_ignore_all("-Wmissing-prototypes",
> > > > > +		  "Global functions as their definitions will be in vmlinux BTF");
> > > > > +
> > > > > +/**
> > > > > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > > > > + * @mnt: the mount to check
> > > > > + *
> > > > > + * Return: true if mount is mapped, false if not.
> > > > > + */
> > > > > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > > > > +{
> > > > > +	return is_idmapped_mnt(mnt);
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * bpf_file_mnt_idmap - get file idmapping
> > > > > + * @file: the file from which to get mapping
> > > > > + *
> > > > > + * Return: The idmap for the @file.
> > > > > + */
> > > > > +__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
> > > > > +{
> > > > > +	return file_mnt_idmap(file);
> > > > > +}
> > > > 
> > > > A dummy question here: the implementation of file_mnt_idmap() is
> > > > file->f_path.mnt->mnt_idmap, so if the passed file is a BTF pointer, is
> > > > there any reason why we could not do such dereference directly in bpf
> > > > program ?
> > > 
> > > I wanted to provide a minimal API for bpf programs. I thought that this
> > > interface is stable enough, but after reading Christian's answer, it looks
> > > like I was wrong.
> > 
> > It isn't even about stability per se. It's unlikely that if we change
> > internal details that types or arguments to these helpers change. That's
> > why we did the work of abstracting this all away in the first place and
> > making this an opaque type.
> > 
> > The wider point is that according to the docs, kfuncs claim to have
> > equivalent status to EXPORT_SYMBOL_*() with the added complexity of
> > maybe having to take out of tree bpf programs into account.
> > 
> > Right now, we can look at the in-kernel users of is_idmapped_mnt(),
> > convert them and then kill this thing off if we wanted to. As soon as
> > this is a kfunc such an endeavour becomes a measure of "f**** around and
> > find out". That's an entirely avoidable conflict if we don't even expose
> > it in the first place.
> > 
> 
> I was hoping to make it possible to use is_idmapped_mnt or its equivalent
> to at least be able to distinguish a file with an idmapped mount from a
> regular one.

Afaict, you can do this today pretty easily. For example,

#!/usr/bin/env bpftrace

#include <linux/mount.h>
#include <linux/path.h>
#include <linux/dcache.h>

kfunc:do_move_mount
{
        printf("Target path           %s\n", str(args->new_path->dentry->d_name.name));
        printf("Target mount idmapped %d\n", args->new_path->mnt->mnt_idmap != kaddr("nop_mnt_idmap"));
}

sample output:

Target path           console
Target mount idmapped 0
Target path           rootfs
Target mount idmapped 1
Alexey Gladkov July 5, 2023, 3:28 p.m. UTC | #8
On Wed, Jul 05, 2023 at 04:18:07PM +0200, Christian Brauner wrote:
> On Wed, Jul 05, 2023 at 03:43:09PM +0200, Alexey Gladkov wrote:
> > On Tue, Jul 04, 2023 at 05:28:13PM +0200, Christian Brauner wrote:
> > > On Tue, Jul 04, 2023 at 05:11:12PM +0200, Alexey Gladkov wrote:
> > > > On Tue, Jul 04, 2023 at 07:42:53PM +0800, Hou Tao wrote:
> > > > > Hi,
> > > > > 
> > > > > On 6/30/2023 7:08 PM, Alexey Gladkov wrote:
> > > > > > Since the introduction of idmapped mounts, file handling has become
> > > > > > somewhat more complicated. If the inode has been found through an
> > > > > > idmapped mount the idmap of the vfsmount must be used to get proper
> > > > > > i_uid / i_gid. This is important, for example, to correctly take into
> > > > > > account idmapped files when caching, LSM or for an audit.
> > > > > 
> > > > > Could you please add a bpf selftest for these newly added kfuncs ?
> > > > > >
> > > > > > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > > > > > ---
> > > > > >  fs/mnt_idmapping.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > > >  1 file changed, 69 insertions(+)
> > > > > >
> > > > > > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> > > > > > index 4905665c47d0..ba98ce26b883 100644
> > > > > > --- a/fs/mnt_idmapping.c
> > > > > > +++ b/fs/mnt_idmapping.c
> > > > > > @@ -6,6 +6,7 @@
> > > > > >  #include <linux/mnt_idmapping.h>
> > > > > >  #include <linux/slab.h>
> > > > > >  #include <linux/user_namespace.h>
> > > > > > +#include <linux/bpf.h>
> > > > > >  
> > > > > >  #include "internal.h"
> > > > > >  
> > > > > > @@ -271,3 +272,71 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
> > > > > >  		kfree(idmap);
> > > > > >  	}
> > > > > >  }
> > > > > > +
> > > > > > +__diag_push();
> > > > > > +__diag_ignore_all("-Wmissing-prototypes",
> > > > > > +		  "Global functions as their definitions will be in vmlinux BTF");
> > > > > > +
> > > > > > +/**
> > > > > > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > > > > > + * @mnt: the mount to check
> > > > > > + *
> > > > > > + * Return: true if mount is mapped, false if not.
> > > > > > + */
> > > > > > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > > > > > +{
> > > > > > +	return is_idmapped_mnt(mnt);
> > > > > > +}
> > > > > > +
> > > > > > +/**
> > > > > > + * bpf_file_mnt_idmap - get file idmapping
> > > > > > + * @file: the file from which to get mapping
> > > > > > + *
> > > > > > + * Return: The idmap for the @file.
> > > > > > + */
> > > > > > +__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
> > > > > > +{
> > > > > > +	return file_mnt_idmap(file);
> > > > > > +}
> > > > > 
> > > > > A dummy question here: the implementation of file_mnt_idmap() is
> > > > > file->f_path.mnt->mnt_idmap, so if the passed file is a BTF pointer, is
> > > > > there any reason why we could not do such dereference directly in bpf
> > > > > program ?
> > > > 
> > > > I wanted to provide a minimal API for bpf programs. I thought that this
> > > > interface is stable enough, but after reading Christian's answer, it looks
> > > > like I was wrong.
> > > 
> > > It isn't even about stability per se. It's unlikely that if we change
> > > internal details that types or arguments to these helpers change. That's
> > > why we did the work of abstracting this all away in the first place and
> > > making this an opaque type.
> > > 
> > > The wider point is that according to the docs, kfuncs claim to have
> > > equivalent status to EXPORT_SYMBOL_*() with the added complexity of
> > > maybe having to take out of tree bpf programs into account.
> > > 
> > > Right now, we can look at the in-kernel users of is_idmapped_mnt(),
> > > convert them and then kill this thing off if we wanted to. As soon as
> > > this is a kfunc such an endeavour becomes a measure of "f**** around and
> > > find out". That's an entirely avoidable conflict if we don't even expose
> > > it in the first place.
> > > 
> > 
> > I was hoping to make it possible to use is_idmapped_mnt or its equivalent
> > to at least be able to distinguish a file with an idmapped mount from a
> > regular one.
> 
> Afaict, you can do this today pretty easily. For example,
> 
> #!/usr/bin/env bpftrace
> 
> #include <linux/mount.h>
> #include <linux/path.h>
> #include <linux/dcache.h>
> 
> kfunc:do_move_mount
> {
>         printf("Target path           %s\n", str(args->new_path->dentry->d_name.name));
>         printf("Target mount idmapped %d\n", args->new_path->mnt->mnt_idmap != kaddr("nop_mnt_idmap"));
> }
> 
> sample output:
> 
> Target path           console
> Target mount idmapped 0
> Target path           rootfs
> Target mount idmapped 1
> 

Well, it's a possible solution, but in this case we don't limit the bpf
programs in hacking. But on the other hand it will be only their problem.

Since this is the current strategy, this is suitable for me.
Alexei Starovoitov July 6, 2023, 1:10 a.m. UTC | #9
On Tue, Jul 4, 2023 at 6:01 AM Christian Brauner <brauner@kernel.org> wrote:
>
> > > +/**
> > > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > > + * @mnt: the mount to check
> > > + *
> > > + * Return: true if mount is mapped, false if not.
> > > + */
> > > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > > +{
> > > +   return is_idmapped_mnt(mnt);
> > > +}
...
>
> I don't want any of these helpers as kfuncs as they are peeking deeply
> into implementation details that we reserve to change. Specifically in
> the light of:
>
>     3. kfunc lifecycle expectations part b):
>
>     "Unlike with regular kernel symbols, this is expected behavior for BPF
>      symbols, and out-of-tree BPF programs that use kfuncs should be considered
>      relevant to discussions and decisions around modifying and removing those
>      kfuncs. The BPF community will take an active role in participating in
>      upstream discussions when necessary to ensure that the perspectives of such
>      users are taken into account."
>
> That's too much stability for my taste for these helpers. The helpers
> here exposed have been modified multiple times and once we wean off
> idmapped mounts from user namespaces completely they will change again.
> So I'm fine if they're traceable but not as kfuncs with any - even
> minimal - stability guarantees.

Christian,
That quote is taken out of context.
In the first place the Documentation/bpf/kfuncs.rst says:
"
kfuncs provide a kernel <-> kernel API, and thus are not bound by any of the
strict stability restrictions associated with kernel <-> user UAPIs. This means
they can be thought of as similar to EXPORT_SYMBOL_GPL, and can therefore be
modified or removed by a maintainer of the subsystem they're defined in when
it's deemed necessary.
"

bpf_get_file_vfs_ids is vfs related, so you guys decide when and how
to add/remove them. It's ok that you don't want this particular one
for whatever reason, but that reason shouldn't be 'stability guarantees'.
There are really none. The kernel kfuncs can change at any time.
There are plenty of examples in git log where we added and then
tweaked/removed kfuncs.

The doc also says:
"
As described above, while sometimes a maintainer may find that a kfunc must be
changed or removed immediately to accommodate some changes in their subsystem,
"
and git log of such cases proves the point.

The quote about out-of-tree bpf progs is necessary today, since
very few bpf progs are in-tree, so when maintainers of a subsystem
want to remove kfunc the program authors need something in the doc
to point to and explain why and how they use the kfunc otherwise
maintainers will just say 'go away. you're out-of-tree'.
The users need their voice to be heard. Even if the result is the same.
In other words the part you quoted is needed to make kfuncs usable.
Otherwise 'kfunc is 100% unstable and maintainers can rename it
every release just to make life of bpf prog writers harder'
becomes a real possibility in the minds of bpf users.
The kfunc doc makes it 100% clear that there are no stability guarantees.
So please don't say 'minimal stability'.

In your other reply:

> we can look at the in-kernel users of is_idmapped_mnt(),
> convert them and then kill this thing off if we wanted to.

you can absolutely do that even if is_idmapped_mnt() is exposed as a kfunc.
You'll just delete it with zero notice if you like.
Just like what you would do with a normal export_symbol.
The doc is pretty clear about it and there are examples where we did
such things.
Christian Brauner July 6, 2023, 7:22 a.m. UTC | #10
On Wed, Jul 05, 2023 at 06:10:32PM -0700, Alexei Starovoitov wrote:
> On Tue, Jul 4, 2023 at 6:01 AM Christian Brauner <brauner@kernel.org> wrote:
> >
> > > > +/**
> > > > + * bpf_is_idmapped_mnt - check whether a mount is idmapped
> > > > + * @mnt: the mount to check
> > > > + *
> > > > + * Return: true if mount is mapped, false if not.
> > > > + */
> > > > +__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
> > > > +{
> > > > +   return is_idmapped_mnt(mnt);
> > > > +}
> ...
> >
> > I don't want any of these helpers as kfuncs as they are peeking deeply
> > into implementation details that we reserve to change. Specifically in
> > the light of:
> >
> >     3. kfunc lifecycle expectations part b):
> >
> >     "Unlike with regular kernel symbols, this is expected behavior for BPF
> >      symbols, and out-of-tree BPF programs that use kfuncs should be considered
> >      relevant to discussions and decisions around modifying and removing those
> >      kfuncs. The BPF community will take an active role in participating in
> >      upstream discussions when necessary to ensure that the perspectives of such
> >      users are taken into account."
> >
> > That's too much stability for my taste for these helpers. The helpers
> > here exposed have been modified multiple times and once we wean off
> > idmapped mounts from user namespaces completely they will change again.
> > So I'm fine if they're traceable but not as kfuncs with any - even
> > minimal - stability guarantees.
> 
> Christian,
> That quote is taken out of context.
> In the first place the Documentation/bpf/kfuncs.rst says:
> "
> kfuncs provide a kernel <-> kernel API, and thus are not bound by any of the
> strict stability restrictions associated with kernel <-> user UAPIs. This means
> they can be thought of as similar to EXPORT_SYMBOL_GPL, and can therefore be
> modified or removed by a maintainer of the subsystem they're defined in when
> it's deemed necessary.
> "
> 
> bpf_get_file_vfs_ids is vfs related, so you guys decide when and how
> to add/remove them. It's ok that you don't want this particular one
> for whatever reason, but that reason shouldn't be 'stability guarantees'.
> There are really none. The kernel kfuncs can change at any time.
> There are plenty of examples in git log where we added and then
> tweaked/removed kfuncs.
> 
> The doc also says:
> "
> As described above, while sometimes a maintainer may find that a kfunc must be
> changed or removed immediately to accommodate some changes in their subsystem,
> "
> and git log of such cases proves the point.
> 
> The quote about out-of-tree bpf progs is necessary today, since
> very few bpf progs are in-tree, so when maintainers of a subsystem
> want to remove kfunc the program authors need something in the doc
> to point to and explain why and how they use the kfunc otherwise
> maintainers will just say 'go away. you're out-of-tree'.
> The users need their voice to be heard. Even if the result is the same.
> In other words the part you quoted is needed to make kfuncs usable.
> Otherwise 'kfunc is 100% unstable and maintainers can rename it
> every release just to make life of bpf prog writers harder'
> becomes a real possibility in the minds of bpf users.
> The kfunc doc makes it 100% clear that there are no stability guarantees.
> So please don't say 'minimal stability'.
> 
> In your other reply:
> 
> > we can look at the in-kernel users of is_idmapped_mnt(),
> > convert them and then kill this thing off if we wanted to.
> 
> you can absolutely do that even if is_idmapped_mnt() is exposed as a kfunc.
> You'll just delete it with zero notice if you like.
> Just like what you would do with a normal export_symbol.
> The doc is pretty clear about it and there are examples where we did
> such things.

I think I said it somewhere else: I'm not opposing your position on
kfruncs in a sense I understand that's kinda the model that you have to
push for. But you have to admit that this out-of-tree portion is very
hard to swallow if you've been hit by out of tree modules and their
complaints about removed EXPORT_SYMBOL*()s.

I'm still rather hesitant about this because I find it hard to figure
out how this will go down in practice. But, especially with the internal
idmapped mount api. This is a very hidden and abstracted away
implementation around an opaque type and I'm not yet ready to let
modules or bpf programs peek into it's implementation details. I hope
that's understandable.
Christoph Hellwig July 6, 2023, 2:37 p.m. UTC | #11
On Tue, Jul 04, 2023 at 03:01:21PM +0200, Christian Brauner wrote:
> That's too much stability for my taste for these helpers. The helpers
> here exposed have been modified multiple times and once we wean off
> idmapped mounts from user namespaces completely they will change again.
> So I'm fine if they're traceable but not as kfuncs with any - even
> minimal - stability guarantees.

I fully agree.  I also don't think any eBPF program has any business
looking at idmapping.
Alexei Starovoitov July 7, 2023, 1:04 a.m. UTC | #12
On Thu, Jul 6, 2023 at 12:22 AM Christian Brauner <brauner@kernel.org> wrote:
> push for. But you have to admit that this out-of-tree portion is very
> hard to swallow if you've been hit by out of tree modules and their
> complaints about removed EXPORT_SYMBOL*()s.

I don't remember a single case where somebody complained so hard
about unexport of a symbol that it was reinstated.
Instead there are plenty of 'unexport foo' in every kernel release.
Like commit 4bb218a65a43 ("fs: unexport buffer_check_dirty_writeback").
Surely they break some oot mods, so what? Complaining doesn't help.
diff mbox series

Patch

diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 4905665c47d0..ba98ce26b883 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -6,6 +6,7 @@ 
 #include <linux/mnt_idmapping.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/bpf.h>
 
 #include "internal.h"
 
@@ -271,3 +272,71 @@  void mnt_idmap_put(struct mnt_idmap *idmap)
 		kfree(idmap);
 	}
 }
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in vmlinux BTF");
+
+/**
+ * bpf_is_idmapped_mnt - check whether a mount is idmapped
+ * @mnt: the mount to check
+ *
+ * Return: true if mount is mapped, false if not.
+ */
+__bpf_kfunc bool bpf_is_idmapped_mnt(struct vfsmount *mnt)
+{
+	return is_idmapped_mnt(mnt);
+}
+
+/**
+ * bpf_file_mnt_idmap - get file idmapping
+ * @file: the file from which to get mapping
+ *
+ * Return: The idmap for the @file.
+ */
+__bpf_kfunc struct mnt_idmap *bpf_file_mnt_idmap(struct file *file)
+{
+	return file_mnt_idmap(file);
+}
+
+/**
+ * bpf_inode_into_vfs_ids - map an inode's i_uid and i_gid down according to an idmapping
+ * @idmap: idmap of the mount the inode was found from
+ * @inode: inode to map
+ *
+ * The inode's i_uid and i_gid mapped down according to @idmap. If the inode's
+ * i_uid or i_gid has no mapping INVALID_VFSUID or INVALID_VFSGID is returned in
+ * the corresponding position.
+ *
+ * Return: A 64-bit integer containing the current GID and UID, and created as
+ * such: *gid* **<< 32 \|** *uid*.
+ */
+__bpf_kfunc uint64_t bpf_inode_into_vfs_ids(struct mnt_idmap *idmap,
+		const struct inode *inode)
+{
+	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
+	vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
+
+	return (u64) __vfsgid_val(vfsgid) << 32 |
+		     __vfsuid_val(vfsuid);
+}
+
+__diag_pop();
+
+BTF_SET8_START(idmap_btf_ids)
+BTF_ID_FLAGS(func, bpf_is_idmapped_mnt)
+BTF_ID_FLAGS(func, bpf_file_mnt_idmap)
+BTF_ID_FLAGS(func, bpf_inode_into_vfs_ids)
+BTF_SET8_END(idmap_btf_ids)
+
+static const struct btf_kfunc_id_set idmap_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &idmap_btf_ids,
+};
+
+static int __init bpf_idmap_kfunc_init(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &idmap_kfunc_set);
+}
+
+late_initcall(bpf_idmap_kfunc_init);