diff mbox series

[bpf-next,2/2] selftests/bpf: Add tests for bpf_get_dentry_xattr

Message ID 20240725234706.655613-3-song@kernel.org (mailing list archive)
State New
Headers show
Series Add kfuncs to support reading xattr from dentry | expand

Commit Message

Song Liu July 25, 2024, 11:47 p.m. UTC
1. Rename fs_kfuncs/xattr to fs_kfuncs/file_xattr and add a call of
   bpf_get_dentry_xattr() to the test.
2. Add a new sub test fs_kfuncs/dentry_xattr, which checks 3 levels of
   parent directories for xattr. This demonstrate the use case that
   a xattr on a directory is used to tag all files in the directory and
   sub directories.

Signed-off-by: Song Liu <song@kernel.org>
---
 .../selftests/bpf/prog_tests/fs_kfuncs.c      | 61 +++++++++++++++++--
 .../selftests/bpf/progs/test_dentry_xattr.c   | 46 ++++++++++++++
 .../selftests/bpf/progs/test_get_xattr.c      | 16 ++++-
 3 files changed, 117 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_dentry_xattr.c

Comments

Christian Brauner July 26, 2024, 7:06 a.m. UTC | #1
On Thu, Jul 25, 2024 at 04:47:06PM GMT, Song Liu wrote:
> 1. Rename fs_kfuncs/xattr to fs_kfuncs/file_xattr and add a call of
>    bpf_get_dentry_xattr() to the test.
> 2. Add a new sub test fs_kfuncs/dentry_xattr, which checks 3 levels of
>    parent directories for xattr. This demonstrate the use case that
>    a xattr on a directory is used to tag all files in the directory and
>    sub directories.
> 
> Signed-off-by: Song Liu <song@kernel.org>
> ---
>  .../selftests/bpf/prog_tests/fs_kfuncs.c      | 61 +++++++++++++++++--
>  .../selftests/bpf/progs/test_dentry_xattr.c   | 46 ++++++++++++++
>  .../selftests/bpf/progs/test_get_xattr.c      | 16 ++++-
>  3 files changed, 117 insertions(+), 6 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/progs/test_dentry_xattr.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
> index 37056ba73847..a960cfbe8907 100644
> --- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
> +++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
> @@ -2,17 +2,19 @@
>  /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
>  
>  #include <stdlib.h>
> +#include <sys/stat.h>
>  #include <sys/types.h>
>  #include <sys/xattr.h>
>  #include <linux/fsverity.h>
>  #include <unistd.h>
>  #include <test_progs.h>
>  #include "test_get_xattr.skel.h"
> +#include "test_dentry_xattr.skel.h"
>  #include "test_fsverity.skel.h"
>  
>  static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
>  
> -static void test_xattr(void)
> +static void test_file_xattr(void)
>  {
>  	struct test_get_xattr *skel = NULL;
>  	int fd = -1, err;
> @@ -50,7 +52,8 @@ static void test_xattr(void)
>  	if (!ASSERT_GE(fd, 0, "open_file"))
>  		goto out;
>  
> -	ASSERT_EQ(skel->bss->found_xattr, 1, "found_xattr");
> +	ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
> +	ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
>  
>  out:
>  	close(fd);
> @@ -58,6 +61,53 @@ static void test_xattr(void)
>  	remove(testfile);
>  }
>  
> +static void test_directory_xattr(void)
> +{
> +	struct test_dentry_xattr *skel = NULL;
> +	static const char * const paths[] = {
> +		"/tmp/a",
> +		"/tmp/a/b",
> +		"/tmp/a/b/c",
> +	};
> +	const char *file = "/tmp/a/b/c/d";
> +	int i, j, err, fd;
> +
> +	for (i = 0; i < sizeof(paths) / sizeof(char *); i++) {
> +		err = mkdir(paths[i], 0755);
> +		if (!ASSERT_OK(err, "mkdir"))
> +			goto out;
> +		err = setxattr(paths[i], "user.kfunc", "hello", sizeof("hello"), 0);
> +		if (!ASSERT_OK(err, "setxattr")) {
> +			i++;
> +			goto out;
> +		}
> +	}
> +
> +	skel = test_dentry_xattr__open_and_load();
> +
> +	if (!ASSERT_OK_PTR(skel, "test_dentry_xattr__open_and_load"))
> +		goto out;
> +
> +	skel->bss->monitored_pid = getpid();
> +	err = test_dentry_xattr__attach(skel);
> +
> +	if (!ASSERT_OK(err, "test_dentry__xattr__attach"))
> +		goto out;
> +
> +	fd = open(file, O_CREAT | O_RDONLY, 0644);
> +	if (!ASSERT_GE(fd, 0, "open_file"))
> +		goto out;
> +
> +	ASSERT_EQ(skel->bss->number_of_xattr_found, 3, "number_of_xattr_found");
> +	close(fd);
> +out:
> +	test_dentry_xattr__destroy(skel);
> +	remove(file);
> +	for (j = i - 1; j >= 0; j--)
> +		rmdir(paths[j]);
> +}
> +
> +
>  #ifndef SHA256_DIGEST_SIZE
>  #define SHA256_DIGEST_SIZE      32
>  #endif
> @@ -134,8 +184,11 @@ static void test_fsverity(void)
>  
>  void test_fs_kfuncs(void)
>  {
> -	if (test__start_subtest("xattr"))
> -		test_xattr();
> +	if (test__start_subtest("file_xattr"))
> +		test_file_xattr();
> +
> +	if (test__start_subtest("dentry_xattr"))
> +		test_directory_xattr();
>  
>  	if (test__start_subtest("fsverity"))
>  		test_fsverity();
> diff --git a/tools/testing/selftests/bpf/progs/test_dentry_xattr.c b/tools/testing/selftests/bpf/progs/test_dentry_xattr.c
> new file mode 100644
> index 000000000000..d2e378b2e2d5
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_dentry_xattr.c
> @@ -0,0 +1,46 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
> +
> +#include "vmlinux.h"
> +#include <bpf/bpf_tracing.h>
> +#include "bpf_kfuncs.h"
> +
> +char _license[] SEC("license") = "GPL";
> +
> +__u32 monitored_pid;
> +__u32 number_of_xattr_found;
> +
> +static const char expected_value[] = "hello";
> +char value[32];
> +
> +SEC("lsm.s/file_open")
> +int BPF_PROG(test_file_open, struct file *f)
> +{
> +	struct bpf_dynptr value_ptr;
> +	struct dentry *dentry, *prev_dentry;
> +	__u32 pid, matches = 0;
> +	int i, ret;
> +
> +	pid = bpf_get_current_pid_tgid() >> 32;
> +	if (pid != monitored_pid)
> +		return 0;
> +
> +	bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
> +
> +	dentry = bpf_file_dentry(f);
> +
> +	for (i = 0; i < 10; i++) {
> +		ret = bpf_get_dentry_xattr(dentry, "user.kfunc", &value_ptr);
> +		if (ret == sizeof(expected_value) &&
> +		    !bpf_strncmp(value, ret, expected_value))
> +			matches++;
> +
> +		prev_dentry = dentry;
> +		dentry = bpf_dget_parent(prev_dentry);

Why do you need to walk upwards and instead of reading the xattr values
during security_inode_permission()?

> +		bpf_dput(prev_dentry);
> +	}
> +
> +	bpf_dput(dentry);
> +	number_of_xattr_found = matches;
> +	return 0;
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
> index 7eb2a4e5a3e5..3b0dc6106ca5 100644
> --- a/tools/testing/selftests/bpf/progs/test_get_xattr.c
> +++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
> @@ -9,7 +9,8 @@
>  char _license[] SEC("license") = "GPL";
>  
>  __u32 monitored_pid;
> -__u32 found_xattr;
> +__u32 found_xattr_from_file;
> +__u32 found_xattr_from_dentry;
>  
>  static const char expected_value[] = "hello";
>  char value[32];
> @@ -18,6 +19,7 @@ SEC("lsm.s/file_open")
>  int BPF_PROG(test_file_open, struct file *f)
>  {
>  	struct bpf_dynptr value_ptr;
> +	struct dentry *dentry;
>  	__u32 pid;
>  	int ret;
>  
> @@ -32,6 +34,16 @@ int BPF_PROG(test_file_open, struct file *f)
>  		return 0;
>  	if (bpf_strncmp(value, ret, expected_value))
>  		return 0;
> -	found_xattr = 1;
> +	found_xattr_from_file = 1;
> +
> +	dentry = bpf_file_dentry(f);
> +	ret = bpf_get_dentry_xattr(dentry, "user.kfuncs", &value_ptr);
> +	bpf_dput(dentry);
> +	if (ret != sizeof(expected_value))
> +		return 0;
> +	if (bpf_strncmp(value, ret, expected_value))
> +		return 0;
> +	found_xattr_from_dentry = 1;
> +
>  	return 0;
>  }
> -- 
> 2.43.0
>
Song Liu July 26, 2024, 9:19 a.m. UTC | #2
Hi Christian, 

> On Jul 26, 2024, at 12:06 AM, Christian Brauner <brauner@kernel.org> wrote:

[...]

>> +
>> + for (i = 0; i < 10; i++) {
>> + ret = bpf_get_dentry_xattr(dentry, "user.kfunc", &value_ptr);
>> + if (ret == sizeof(expected_value) &&
>> +    !bpf_strncmp(value, ret, expected_value))
>> + matches++;
>> +
>> + prev_dentry = dentry;
>> + dentry = bpf_dget_parent(prev_dentry);
> 
> Why do you need to walk upwards and instead of reading the xattr values
> during security_inode_permission()?

In this use case, we would like to add xattr to the directory to cover
all files under it. For example, assume we have the following xattrs:

  /bin  xattr: user.policy_A = value_A
  /bin/gcc-6.9/ xattr: user.policy_A = value_B
  /bin/gcc-6.9/gcc xattr: user.policy_A = value_C

/bin/gcc-6.9/gcc will use value_C;
/bin/gcc-6.9/<other_files> will use value_B;
/bin/<other_folder_or_file> will use value_A;

By walking upwards from security_file_open(), we can finish the logic 
in a single LSM hook:

    repeat:
        if (dentry have user.policy_A) {
            /* make decision based on value */;
        } else {
            dentry = bpf_dget_parent();
            goto repeat;
        }

Does this make sense? Or maybe I misunderstood the suggestion?

Also, we don't have a bpf_get_inode_xattr() yet. I guess we will need
it for the security_inode_permission approach. If we agree that's a 
better approach, I more than happy to implement it that way. In fact,
I think we will eventually need both bpf_get_inode_xattr() and 
bpf_get_dentry_xattr(). 

Thanks,
Song


>> + bpf_dput(prev_dentry);
>> + }
>> +
Christian Brauner July 26, 2024, 11:51 a.m. UTC | #3
On Fri, Jul 26, 2024 at 09:19:54AM GMT, Song Liu wrote:
> Hi Christian, 
> 
> > On Jul 26, 2024, at 12:06 AM, Christian Brauner <brauner@kernel.org> wrote:
> 
> [...]
> 
> >> +
> >> + for (i = 0; i < 10; i++) {
> >> + ret = bpf_get_dentry_xattr(dentry, "user.kfunc", &value_ptr);
> >> + if (ret == sizeof(expected_value) &&
> >> +    !bpf_strncmp(value, ret, expected_value))
> >> + matches++;
> >> +
> >> + prev_dentry = dentry;
> >> + dentry = bpf_dget_parent(prev_dentry);
> > 
> > Why do you need to walk upwards and instead of reading the xattr values
> > during security_inode_permission()?
> 
> In this use case, we would like to add xattr to the directory to cover
> all files under it. For example, assume we have the following xattrs:
> 
>   /bin  xattr: user.policy_A = value_A
>   /bin/gcc-6.9/ xattr: user.policy_A = value_B
>   /bin/gcc-6.9/gcc xattr: user.policy_A = value_C
> 
> /bin/gcc-6.9/gcc will use value_C;
> /bin/gcc-6.9/<other_files> will use value_B;
> /bin/<other_folder_or_file> will use value_A;
> 
> By walking upwards from security_file_open(), we can finish the logic 
> in a single LSM hook:
> 
>     repeat:
>         if (dentry have user.policy_A) {
>             /* make decision based on value */;
>         } else {
>             dentry = bpf_dget_parent();
>             goto repeat;
>         }
> 
> Does this make sense? Or maybe I misunderstood the suggestion?

Imho, what you're doing belongs into inode_permission() not into
security_file_open(). That's already too late and it's somewhat clear
from the example you're using that you're essentially doing permission
checking during path lookup.

Btw, what you're doing is potentially very heavy-handed because you're
retrieving xattrs for which no VFS cache exists so you might end up
causing a lot of io.

Say you have a 10000 deep directory hierarchy and you open a
file_at_level_10000. With that dget_parent() logic in the worst case you
end up walking up the whole hierarchy reading xattr values from disk
10000 times. You can achieve the same result and cleaner if you do the
checking in inode_permission() where it belongs and you only cause all
of that pain once and you abort path lookup correctly.

Also, I'm not even sure this is always correct because you're
retroactively checking what policy to apply based on the xattr value
walking up the parent chain. But a rename could happen and then the
ancestor chain you're checking is different from the current chain or
there's a bunch of mounts along the way.

Imho, that dget_parent() thing just encourages very badly written bpf
LSM programs. That's certainly not an interface we want to expose.

> Also, we don't have a bpf_get_inode_xattr() yet. I guess we will need
> it for the security_inode_permission approach. If we agree that's a 

Yes, that's fine.

You also need to ensure that you're only reading user.* xattrs. I know
you already do that for bpf_get_file_xattr() but this helper needs the
same treatment.

And you need to force a drop-out of RCU path lookup btw because you're
almost definitely going to block when you check the xattr.

> better approach, I more than happy to implement it that way. In fact,
> I think we will eventually need both bpf_get_inode_xattr() and 
> bpf_get_dentry_xattr(). 

I'm not sure about that because it's royally annoying in the first place
that we have to dentry and inode separately in the xattr handlers
because LSMs sometimes call them from a location when the dentry and
inode aren't yet fused together. The dentry is the wrong data structure
to care about here.
Song Liu July 26, 2024, 7:43 p.m. UTC | #4
Hi Christian, 

Thanks a lot for your comments.

> On Jul 26, 2024, at 4:51 AM, Christian Brauner <brauner@kernel.org> wrote:
> 
> On Fri, Jul 26, 2024 at 09:19:54AM GMT, Song Liu wrote:
>> Hi Christian, 
>> 
>>> On Jul 26, 2024, at 12:06 AM, Christian Brauner <brauner@kernel.org> wrote:
>> 
>> [...]
>> 
>>>> +
>>>> + for (i = 0; i < 10; i++) {
>>>> + ret = bpf_get_dentry_xattr(dentry, "user.kfunc", &value_ptr);
>>>> + if (ret == sizeof(expected_value) &&
>>>> +    !bpf_strncmp(value, ret, expected_value))
>>>> + matches++;
>>>> +
>>>> + prev_dentry = dentry;
>>>> + dentry = bpf_dget_parent(prev_dentry);
>>> 
>>> Why do you need to walk upwards and instead of reading the xattr values
>>> during security_inode_permission()?
>> 
>> In this use case, we would like to add xattr to the directory to cover
>> all files under it. For example, assume we have the following xattrs:
>> 
>>  /bin  xattr: user.policy_A = value_A
>>  /bin/gcc-6.9/ xattr: user.policy_A = value_B
>>  /bin/gcc-6.9/gcc xattr: user.policy_A = value_C
>> 
>> /bin/gcc-6.9/gcc will use value_C;
>> /bin/gcc-6.9/<other_files> will use value_B;
>> /bin/<other_folder_or_file> will use value_A;
>> 
>> By walking upwards from security_file_open(), we can finish the logic 
>> in a single LSM hook:
>> 
>>    repeat:
>>        if (dentry have user.policy_A) {
>>            /* make decision based on value */;
>>        } else {
>>            dentry = bpf_dget_parent();
>>            goto repeat;
>>        }
>> 
>> Does this make sense? Or maybe I misunderstood the suggestion?
> 
> Imho, what you're doing belongs into inode_permission() not into
> security_file_open(). That's already too late and it's somewhat clear
> from the example you're using that you're essentially doing permission
> checking during path lookup.

I am not sure I follow the suggestion to implement this with 
security_inode_permission()? Could you please share more details about
this idea?

> Btw, what you're doing is potentially very heavy-handed because you're
> retrieving xattrs for which no VFS cache exists so you might end up
> causing a lot of io.
> 
> Say you have a 10000 deep directory hierarchy and you open a
> file_at_level_10000. With that dget_parent() logic in the worst case you
> end up walking up the whole hierarchy reading xattr values from disk
> 10000 times. You can achieve the same result and cleaner if you do the
> checking in inode_permission() where it belongs and you only cause all
> of that pain once and you abort path lookup correctly.

Yes, we need the BPF program to limit the number of parents to walk. 

> Also, I'm not even sure this is always correct because you're
> retroactively checking what policy to apply based on the xattr value
> walking up the parent chain. But a rename could happen and then the
> ancestor chain you're checking is different from the current chain or
> there's a bunch of mounts along the way. 
> 
> Imho, that dget_parent() thing just encourages very badly written bpf
> LSM programs. That's certainly not an interface we want to expose.
> 
>> Also, we don't have a bpf_get_inode_xattr() yet. I guess we will need
>> it for the security_inode_permission approach. If we agree that's a
> 
> Yes, that's fine.
> 
> You also need to ensure that you're only reading user.* xattrs. I know
> you already do that for bpf_get_file_xattr() but this helper needs the
> same treatment.

Sounds good. bpf_get_inode_xattr() would be a very useful kfunc. Let's
add that. 

> 
> And you need to force a drop-out of RCU path lookup btw because you're
> almost definitely going to block when you check the xattr.

We only allow xattr look up from sleepable context. 

> 
>> better approach, I more than happy to implement it that way. In fact,
>> I think we will eventually need both bpf_get_inode_xattr() and 
>> bpf_get_dentry_xattr().
> 
> I'm not sure about that because it's royally annoying in the first place
> that we have to dentry and inode separately in the xattr handlers
> because LSMs sometimes call them from a location when the dentry and
> inode aren't yet fused together. The dentry is the wrong data structure
> to care about here.

Thanks,
Song
Christian Brauner July 29, 2024, 1:46 p.m. UTC | #5
On Fri, Jul 26, 2024 at 07:43:28PM GMT, Song Liu wrote:
> Hi Christian, 
> 
> Thanks a lot for your comments.
> 
> > On Jul 26, 2024, at 4:51 AM, Christian Brauner <brauner@kernel.org> wrote:
> > 
> > On Fri, Jul 26, 2024 at 09:19:54AM GMT, Song Liu wrote:
> >> Hi Christian, 
> >> 
> >>> On Jul 26, 2024, at 12:06 AM, Christian Brauner <brauner@kernel.org> wrote:
> >> 
> >> [...]
> >> 
> >>>> +
> >>>> + for (i = 0; i < 10; i++) {
> >>>> + ret = bpf_get_dentry_xattr(dentry, "user.kfunc", &value_ptr);
> >>>> + if (ret == sizeof(expected_value) &&
> >>>> +    !bpf_strncmp(value, ret, expected_value))
> >>>> + matches++;
> >>>> +
> >>>> + prev_dentry = dentry;
> >>>> + dentry = bpf_dget_parent(prev_dentry);
> >>> 
> >>> Why do you need to walk upwards and instead of reading the xattr values
> >>> during security_inode_permission()?
> >> 
> >> In this use case, we would like to add xattr to the directory to cover
> >> all files under it. For example, assume we have the following xattrs:
> >> 
> >>  /bin  xattr: user.policy_A = value_A
> >>  /bin/gcc-6.9/ xattr: user.policy_A = value_B
> >>  /bin/gcc-6.9/gcc xattr: user.policy_A = value_C
> >> 
> >> /bin/gcc-6.9/gcc will use value_C;
> >> /bin/gcc-6.9/<other_files> will use value_B;
> >> /bin/<other_folder_or_file> will use value_A;
> >> 
> >> By walking upwards from security_file_open(), we can finish the logic 
> >> in a single LSM hook:
> >> 
> >>    repeat:
> >>        if (dentry have user.policy_A) {
> >>            /* make decision based on value */;
> >>        } else {
> >>            dentry = bpf_dget_parent();
> >>            goto repeat;
> >>        }
> >> 
> >> Does this make sense? Or maybe I misunderstood the suggestion?
> > 
> > Imho, what you're doing belongs into inode_permission() not into
> > security_file_open(). That's already too late and it's somewhat clear
> > from the example you're using that you're essentially doing permission
> > checking during path lookup.
> 
> I am not sure I follow the suggestion to implement this with 
> security_inode_permission()? Could you please share more details about
> this idea?

Given a path like /bin/gcc-6.9/gcc what that code currently does is:

* walk down to /bin/gcc-6.9/gcc
* walk up from /bin/gcc-6.9/gcc and then checking xattr labels for:
  gcc
  gcc-6.9/
  bin/
  /

That's broken because someone could've done
mv /bin/gcc-6.9/gcc /attack/ and when this walks back and it checks xattrs on
/attack even though the path lookup was for /bin/gcc-6.9. IOW, the
security_file_open() checks have nothing to do with the permission checks that
were done during path lookup.

Why isn't that logic:

* walk down to /bin/gcc-6.9/gcc and check for each component:

  security_inode_permission(/)
  security_inode_permission(gcc-6.9/)
  security_inode_permission(bin/)
  security_inode_permission(gcc)
  security_file_open(gcc)

I think that dget_parent() logic also wouldn't make sense for relative path
lookups:

	dfd = open("/bin/gcc-6.9", O_RDONLY | O_DIRECTORY | O_CLOEXEC);

This walks down to /bin/gcc-6.9 and then walks back up (subject to the
same problem mentioned earlier) and check xattrs for:

  gcc-6.9
  bin/
  /

then that dfd is passed to openat() to open "gcc":

fd = openat(dfd, "gcc", O_RDONLY);

which again walks up to /bin/gcc-6.9 and checks xattrs for:
  gcc
  gcc-6.9
  bin/
  /

Which means this code ends up charging relative lookups twice. Even if one
irons that out in the program this encourages really bad patterns.
Path lookup is iterative top down. One can't just randomly walk back up and
assume that's equivalent.
Song Liu July 30, 2024, 5:58 a.m. UTC | #6
Hi Christian, 

Thanks a lot for your detailed explanation! We will revisit the design 
based on these comments and suggestions. 

One more question about a potential new kfunc bpf_get_inode_xattr(): 
Should it take dentry as input? IOW, should it look like:

__bpf_kfunc int bpf_get_inode_xattr(struct dentry *dentry, const char *name__str,
                                    struct bpf_dynptr *value_p)
{
        struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
        u32 value_len;
        void *value;
        int ret;

        if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
                return -EPERM;

        value_len = __bpf_dynptr_size(value_ptr);
        value = __bpf_dynptr_data_rw(value_ptr, value_len);
        if (!value)
                return -EINVAL;

        ret = inode_permission(&nop_mnt_idmap, dentry->d_inode, MAY_READ);
        if (ret)
                return ret;
        return __vfs_getxattr(dentry, dentry->d_inode, name__str, value, value_len);
}


I am asking because many security_inode_* hooks actually taking dentry as 
argument. So it makes sense to use dentry for kfuncs. Maybe we should
call it bpf_get_dentry_xattr, which is actually the same kfunc in this
set (1/2)?

Thanks,
Song



> On Jul 29, 2024, at 6:46 AM, Christian Brauner <brauner@kernel.org> wrote:
[...]
>>> Imho, what you're doing belongs into inode_permission() not into
>>> security_file_open(). That's already too late and it's somewhat clear
>>> from the example you're using that you're essentially doing permission
>>> checking during path lookup.
>> 
>> I am not sure I follow the suggestion to implement this with 
>> security_inode_permission()? Could you please share more details about
>> this idea?

[...]
Christian Brauner July 30, 2024, 8:59 a.m. UTC | #7
On Tue, Jul 30, 2024 at 05:58:31AM GMT, Song Liu wrote:
> Hi Christian, 
> 
> Thanks a lot for your detailed explanation! We will revisit the design 
> based on these comments and suggestions. 
> 
> One more question about a potential new kfunc bpf_get_inode_xattr(): 
> Should it take dentry as input? IOW, should it look like:
> 
> __bpf_kfunc int bpf_get_inode_xattr(struct dentry *dentry, const char *name__str,
>                                     struct bpf_dynptr *value_p)
> {
>         struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
>         u32 value_len;
>         void *value;
>         int ret;
> 
>         if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
>                 return -EPERM;
> 
>         value_len = __bpf_dynptr_size(value_ptr);
>         value = __bpf_dynptr_data_rw(value_ptr, value_len);
>         if (!value)
>                 return -EINVAL;
> 
>         ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ);
>         if (ret)
>                 return ret;
>         return __vfs_getxattr(dentry, inode, name__str, value, value_len);
> }
> 
> 
> I am asking because many security_inode_* hooks actually taking dentry as 
> argument. So it makes sense to use dentry for kfuncs. Maybe we should

Some filesystems (i) require access to the @dentry in their xattr
handlers (e.g. 9p) and (ii) ->get() and ->set() xattr handlers can be
called when @inode hasn't been attached to @dentry yet.

So if you allowed to call bpf_get_*_xattr() from
security_d_instantiate() to somehow retrieve xattrs from there, then you
need to pass @dentry and @inode separately and you cannot use
@dentry->d_inode because it would still be NULL.

However, I doubt you'd call bpf_get_*_xattr() from
security_d_instantiate() so imo just pass the dentry and add a check
like:

struct inode *inode = d_inode(dentry);
if (WARN_ON(!inode))
	return -EINVAL;

in there.
Song Liu Aug. 19, 2024, 7:18 a.m. UTC | #8
Hi Christian, 

Thanks again for your suggestions here. I have got more questions on
this work. 

> On Jul 29, 2024, at 6:46 AM, Christian Brauner <brauner@kernel.org> wrote:

[...]

>> I am not sure I follow the suggestion to implement this with 
>> security_inode_permission()? Could you please share more details about
>> this idea?
> 
> Given a path like /bin/gcc-6.9/gcc what that code currently does is:
> 
> * walk down to /bin/gcc-6.9/gcc
> * walk up from /bin/gcc-6.9/gcc and then checking xattr labels for:
>  gcc
>  gcc-6.9/
>  bin/
>  /
> 
> That's broken because someone could've done
> mv /bin/gcc-6.9/gcc /attack/ and when this walks back and it checks xattrs on
> /attack even though the path lookup was for /bin/gcc-6.9. IOW, the
> security_file_open() checks have nothing to do with the permission checks that
> were done during path lookup.
> 
> Why isn't that logic:
> 
> * walk down to /bin/gcc-6.9/gcc and check for each component:
> 
>  security_inode_permission(/)
>  security_inode_permission(gcc-6.9/)
>  security_inode_permission(bin/)
>  security_inode_permission(gcc)
>  security_file_open(gcc)

I am trying to implement this approach. The idea, IIUC, is:

1. For each open/openat, as we walk the path in do_filp_open=>path_openat, 
   check xattr for "/", "gcc-6.9/", "bin/" for all given flags.
2. Save the value of the flag somewhere (for BPF, we can use inode local
   storage). This is needed, because openat(dfd, ..) will not start from
   root again. 
3. Propagate these flag to children. All the above are done at 
   security_inode_permission. 
4. Finally, at security_file_open, check the xattr with the file, which 
   is probably propagated from some parents.

Did I get this right? 

IIUC, there are a few issues with this approach. 

1. security_inode_permission takes inode as parameter. However, we need 
   dentry to get the xattr. Shall we change security_inode_permission
   to take dentry instead? 
   PS: Maybe we should change most/all inode hooks to take dentry instead?

2. There is no easy way to propagate data from parent. Assuming we already
   changed security_inode_permission to take dentry, we still need some
   mechanism to look up xattr from the parent, which is probably still 
   something like bpf_dget_parent(). Or maybe we should add another hook 
   with both parent and child dentry as input?

3. Given we save the flag from parents in children's inode local storage, 
   we may consume non-trivial extra memory. BPF inode local storage will 
   be freed as the inode gets freed, so we will not leak any memory or 
   overflow some hash map. However, this will probably increase the 
   memory consumption of inode by a few percents. I think a "walk-up" 
   based approach will not have this problem, as we don't need the extra
   storage. Of course, this means more xattr lookups in some cases. 

> 
> I think that dget_parent() logic also wouldn't make sense for relative path
> lookups:
> 
> dfd = open("/bin/gcc-6.9", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
> 
> This walks down to /bin/gcc-6.9 and then walks back up (subject to the
> same problem mentioned earlier) and check xattrs for:
> 
>  gcc-6.9
>  bin/
>  /
> 
> then that dfd is passed to openat() to open "gcc":
> 
> fd = openat(dfd, "gcc", O_RDONLY);
> 
> which again walks up to /bin/gcc-6.9 and checks xattrs for:
>  gcc
>  gcc-6.9
>  bin/
>  /
> 
> Which means this code ends up charging relative lookups twice. Even if one
> irons that out in the program this encourages really bad patterns.
> Path lookup is iterative top down. One can't just randomly walk back up and
> assume that's equivalent.

I understand that walk-up is not equivalent to walk down. But it is probably
accurate enough for some security policies. For example, LSM LandLock uses
similar logic in the file_open hook (file security/landlock/fs.c, function 
is_access_to_paths_allowed). 

To summary my thoughts here. I think we need:

1. Change security_inode_permission to take dentry instead of inode. 
2. Still add bpf_dget_parent. We will use it with security_inode_permission
   so that we can propagate flags from parents to children. We will need
   a bpf_dput as well. 
3. There are pros and cons with different approaches to implement this
   policy (tags on directory work for all files in it). We probably need 
   the policy writer to decide with one to use. From BPF's POV, dget_parent
   is "safe", because it won't crash the system. It may encourage some bad
   patterns, but it appears to be required in some use cases. 

Does this make sense?

Thanks,
Song
Christian Brauner Aug. 19, 2024, 11:16 a.m. UTC | #9
On Mon, Aug 19, 2024 at 07:18:40AM GMT, Song Liu wrote:
> Hi Christian, 
> 
> Thanks again for your suggestions here. I have got more questions on
> this work. 
> 
> > On Jul 29, 2024, at 6:46 AM, Christian Brauner <brauner@kernel.org> wrote:
> 
> [...]
> 
> >> I am not sure I follow the suggestion to implement this with 
> >> security_inode_permission()? Could you please share more details about
> >> this idea?
> > 
> > Given a path like /bin/gcc-6.9/gcc what that code currently does is:
> > 
> > * walk down to /bin/gcc-6.9/gcc
> > * walk up from /bin/gcc-6.9/gcc and then checking xattr labels for:
> >  gcc
> >  gcc-6.9/
> >  bin/
> >  /
> > 
> > That's broken because someone could've done
> > mv /bin/gcc-6.9/gcc /attack/ and when this walks back and it checks xattrs on
> > /attack even though the path lookup was for /bin/gcc-6.9. IOW, the
> > security_file_open() checks have nothing to do with the permission checks that
> > were done during path lookup.
> > 
> > Why isn't that logic:
> > 
> > * walk down to /bin/gcc-6.9/gcc and check for each component:
> > 
> >  security_inode_permission(/)
> >  security_inode_permission(gcc-6.9/)
> >  security_inode_permission(bin/)
> >  security_inode_permission(gcc)
> >  security_file_open(gcc)
> 
> I am trying to implement this approach. The idea, IIUC, is:
> 
> 1. For each open/openat, as we walk the path in do_filp_open=>path_openat, 
>    check xattr for "/", "gcc-6.9/", "bin/" for all given flags.
> 2. Save the value of the flag somewhere (for BPF, we can use inode local
>    storage). This is needed, because openat(dfd, ..) will not start from
>    root again. 
> 3. Propagate these flag to children. All the above are done at 
>    security_inode_permission. 
> 4. Finally, at security_file_open, check the xattr with the file, which 
>    is probably propagated from some parents.
> 
> Did I get this right? 
> 
> IIUC, there are a few issues with this approach. 
> 
> 1. security_inode_permission takes inode as parameter. However, we need 
>    dentry to get the xattr. Shall we change security_inode_permission
>    to take dentry instead? 
>    PS: Maybe we should change most/all inode hooks to take dentry instead?

security_inode_permission() is called in generic_permission() which in
turn is called from inode_permission() which in turn is called from
inode->i_op->permission() for various filesystems. So to make
security_inode_permission() take a dentry argument one would need to
change all inode->i_op->permission() to take a dentry argument for all
filesystems. NAK on that.

That's ignoring that it's just plain wrong to pass a dentry to
**inode**_permission() or security_**inode**_permission(). It's
permissions on the inode, not the dentry.

> 
> 2. There is no easy way to propagate data from parent. Assuming we already
>    changed security_inode_permission to take dentry, we still need some
>    mechanism to look up xattr from the parent, which is probably still 
>    something like bpf_dget_parent(). Or maybe we should add another hook 
>    with both parent and child dentry as input?
> 
> 3. Given we save the flag from parents in children's inode local storage, 
>    we may consume non-trivial extra memory. BPF inode local storage will 
>    be freed as the inode gets freed, so we will not leak any memory or 
>    overflow some hash map. However, this will probably increase the 
>    memory consumption of inode by a few percents. I think a "walk-up" 
>    based approach will not have this problem, as we don't need the extra
>    storage. Of course, this means more xattr lookups in some cases. 
> 
> > 
> > I think that dget_parent() logic also wouldn't make sense for relative path
> > lookups:
> > 
> > dfd = open("/bin/gcc-6.9", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
> > 
> > This walks down to /bin/gcc-6.9 and then walks back up (subject to the
> > same problem mentioned earlier) and check xattrs for:
> > 
> >  gcc-6.9
> >  bin/
> >  /
> > 
> > then that dfd is passed to openat() to open "gcc":
> > 
> > fd = openat(dfd, "gcc", O_RDONLY);
> > 
> > which again walks up to /bin/gcc-6.9 and checks xattrs for:
> >  gcc
> >  gcc-6.9
> >  bin/
> >  /
> > 
> > Which means this code ends up charging relative lookups twice. Even if one
> > irons that out in the program this encourages really bad patterns.
> > Path lookup is iterative top down. One can't just randomly walk back up and
> > assume that's equivalent.
> 
> I understand that walk-up is not equivalent to walk down. But it is probably
> accurate enough for some security policies. For example, LSM LandLock uses
> similar logic in the file_open hook (file security/landlock/fs.c, function 
> is_access_to_paths_allowed). 

I'm not well-versed in landlock so I'll let Mickaël comment on this with
more details but there's very important restrictions and differences
here.

Landlock expresses security policies with file hierarchies and
security_inode_permission() doesn't and cannot have access to that.

Landlock is subject to the same problem that the BPF is here. Namely
that the VFS permission checking could have been done on a path walk
completely different from the path walk that is checked when walking
back up from security_file_open().

But because landlock works with a deny-by-default security policy this
is ok and it takes overmounts into account etc.

> 
> To summary my thoughts here. I think we need:
> 
> 1. Change security_inode_permission to take dentry instead of inode. 

Sorry, no.

> 2. Still add bpf_dget_parent. We will use it with security_inode_permission
>    so that we can propagate flags from parents to children. We will need
>    a bpf_dput as well. 
> 3. There are pros and cons with different approaches to implement this
>    policy (tags on directory work for all files in it). We probably need 
>    the policy writer to decide with one to use. From BPF's POV, dget_parent
>    is "safe", because it won't crash the system. It may encourage some bad
>    patterns, but it appears to be required in some use cases. 

You cannot just walk a path upwards and check permissions and assume
that this is safe unless you have a clear idea what makes it safe in
this scenario. Landlock has afaict. But so far you only have a vague
sketch of checking permissions walking upwards and retrieving xattrs
without any notion of the problems involved.

If you provide a bpf_get_parent() api for userspace to consume you'll
end up providing them with an api that is extremly easy to misuse.
Mickaël Salaün Aug. 19, 2024, 1:12 p.m. UTC | #10
On Mon, Aug 19, 2024 at 01:16:04PM +0200, Christian Brauner wrote:
> On Mon, Aug 19, 2024 at 07:18:40AM GMT, Song Liu wrote:
> > Hi Christian, 
> > 
> > Thanks again for your suggestions here. I have got more questions on
> > this work. 
> > 
> > > On Jul 29, 2024, at 6:46 AM, Christian Brauner <brauner@kernel.org> wrote:
> > 
> > [...]
> > 
> > >> I am not sure I follow the suggestion to implement this with 
> > >> security_inode_permission()? Could you please share more details about
> > >> this idea?
> > > 
> > > Given a path like /bin/gcc-6.9/gcc what that code currently does is:
> > > 
> > > * walk down to /bin/gcc-6.9/gcc
> > > * walk up from /bin/gcc-6.9/gcc and then checking xattr labels for:
> > >  gcc
> > >  gcc-6.9/
> > >  bin/
> > >  /
> > > 
> > > That's broken because someone could've done
> > > mv /bin/gcc-6.9/gcc /attack/ and when this walks back and it checks xattrs on
> > > /attack even though the path lookup was for /bin/gcc-6.9. IOW, the
> > > security_file_open() checks have nothing to do with the permission checks that
> > > were done during path lookup.
> > > 
> > > Why isn't that logic:
> > > 
> > > * walk down to /bin/gcc-6.9/gcc and check for each component:
> > > 
> > >  security_inode_permission(/)
> > >  security_inode_permission(gcc-6.9/)
> > >  security_inode_permission(bin/)
> > >  security_inode_permission(gcc)
> > >  security_file_open(gcc)
> > 
> > I am trying to implement this approach. The idea, IIUC, is:
> > 
> > 1. For each open/openat, as we walk the path in do_filp_open=>path_openat, 
> >    check xattr for "/", "gcc-6.9/", "bin/" for all given flags.
> > 2. Save the value of the flag somewhere (for BPF, we can use inode local
> >    storage). This is needed, because openat(dfd, ..) will not start from
> >    root again. 
> > 3. Propagate these flag to children. All the above are done at 
> >    security_inode_permission. 
> > 4. Finally, at security_file_open, check the xattr with the file, which 
> >    is probably propagated from some parents.
> > 
> > Did I get this right? 
> > 
> > IIUC, there are a few issues with this approach. 
> > 
> > 1. security_inode_permission takes inode as parameter. However, we need 
> >    dentry to get the xattr. Shall we change security_inode_permission
> >    to take dentry instead? 
> >    PS: Maybe we should change most/all inode hooks to take dentry instead?
> 
> security_inode_permission() is called in generic_permission() which in
> turn is called from inode_permission() which in turn is called from
> inode->i_op->permission() for various filesystems. So to make
> security_inode_permission() take a dentry argument one would need to
> change all inode->i_op->permission() to take a dentry argument for all
> filesystems. NAK on that.
> 
> That's ignoring that it's just plain wrong to pass a dentry to
> **inode**_permission() or security_**inode**_permission(). It's
> permissions on the inode, not the dentry.
> 
> > 
> > 2. There is no easy way to propagate data from parent. Assuming we already
> >    changed security_inode_permission to take dentry, we still need some
> >    mechanism to look up xattr from the parent, which is probably still 
> >    something like bpf_dget_parent(). Or maybe we should add another hook 
> >    with both parent and child dentry as input?
> > 
> > 3. Given we save the flag from parents in children's inode local storage, 
> >    we may consume non-trivial extra memory. BPF inode local storage will 
> >    be freed as the inode gets freed, so we will not leak any memory or 
> >    overflow some hash map. However, this will probably increase the 
> >    memory consumption of inode by a few percents. I think a "walk-up" 
> >    based approach will not have this problem, as we don't need the extra
> >    storage. Of course, this means more xattr lookups in some cases. 
> > 
> > > 
> > > I think that dget_parent() logic also wouldn't make sense for relative path
> > > lookups:
> > > 
> > > dfd = open("/bin/gcc-6.9", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
> > > 
> > > This walks down to /bin/gcc-6.9 and then walks back up (subject to the
> > > same problem mentioned earlier) and check xattrs for:
> > > 
> > >  gcc-6.9
> > >  bin/
> > >  /
> > > 
> > > then that dfd is passed to openat() to open "gcc":
> > > 
> > > fd = openat(dfd, "gcc", O_RDONLY);
> > > 
> > > which again walks up to /bin/gcc-6.9 and checks xattrs for:
> > >  gcc
> > >  gcc-6.9
> > >  bin/
> > >  /
> > > 
> > > Which means this code ends up charging relative lookups twice. Even if one
> > > irons that out in the program this encourages really bad patterns.
> > > Path lookup is iterative top down. One can't just randomly walk back up and
> > > assume that's equivalent.
> > 
> > I understand that walk-up is not equivalent to walk down. But it is probably
> > accurate enough for some security policies. For example, LSM LandLock uses
> > similar logic in the file_open hook (file security/landlock/fs.c, function 
> > is_access_to_paths_allowed). 
> 
> I'm not well-versed in landlock so I'll let Mickaël comment on this with
> more details but there's very important restrictions and differences
> here.
> 
> Landlock expresses security policies with file hierarchies and
> security_inode_permission() doesn't and cannot have access to that.
> 
> Landlock is subject to the same problem that the BPF is here. Namely
> that the VFS permission checking could have been done on a path walk
> completely different from the path walk that is checked when walking
> back up from security_file_open().
> 
> But because landlock works with a deny-by-default security policy this
> is ok and it takes overmounts into account etc.

Correct. Another point is that Landlock uses the file's path (i.e.
dentry + mnt) to walk down to the parent.  Only using the dentry would
be incorrect for most use cases (i.e. any system with more than one
mount point).

> 
> > 
> > To summary my thoughts here. I think we need:
> > 
> > 1. Change security_inode_permission to take dentry instead of inode. 
> 
> Sorry, no.
> 
> > 2. Still add bpf_dget_parent. We will use it with security_inode_permission
> >    so that we can propagate flags from parents to children. We will need
> >    a bpf_dput as well. 
> > 3. There are pros and cons with different approaches to implement this
> >    policy (tags on directory work for all files in it). We probably need 
> >    the policy writer to decide with one to use. From BPF's POV, dget_parent
> >    is "safe", because it won't crash the system. It may encourage some bad
> >    patterns, but it appears to be required in some use cases. 
> 
> You cannot just walk a path upwards and check permissions and assume
> that this is safe unless you have a clear idea what makes it safe in
> this scenario. Landlock has afaict. But so far you only have a vague
> sketch of checking permissions walking upwards and retrieving xattrs
> without any notion of the problems involved.

Something to keep in mind is that relying on xattr to label files
requires to deny sanboxed processes to change this xattr, otherwise it
would be trivial to bypass such a sandbox.  Sandboxing must be though as
a whole and Landlock's design for file system access control takes into
account all kind of file system operations that could bypass a sandbox
policy (e.g. mount operations), and also protects from impersonations.

What is the use case for this patch series?  Couldn't Landlock be used
for that?

> 
> If you provide a bpf_get_parent() api for userspace to consume you'll
> end up providing them with an api that is extremly easy to misuse.
Song Liu Aug. 19, 2024, 8:25 p.m. UTC | #11
Hi Christian, 

> On Aug 19, 2024, at 4:16 AM, Christian Brauner <brauner@kernel.org> wrote:

[...]

>> Did I get this right? 
>> 
>> IIUC, there are a few issues with this approach. 
>> 
>> 1. security_inode_permission takes inode as parameter. However, we need 
>>   dentry to get the xattr. Shall we change security_inode_permission
>>   to take dentry instead? 
>>   PS: Maybe we should change most/all inode hooks to take dentry instead?
> 
> security_inode_permission() is called in generic_permission() which in
> turn is called from inode_permission() which in turn is called from
> inode->i_op->permission() for various filesystems. So to make
> security_inode_permission() take a dentry argument one would need to
> change all inode->i_op->permission() to take a dentry argument for all
> filesystems. NAK on that.
> 
> That's ignoring that it's just plain wrong to pass a dentry to
> **inode**_permission() or security_**inode**_permission(). It's
> permissions on the inode, not the dentry.

Agreed. 

[...]

>>> 
>>> Which means this code ends up charging relative lookups twice. Even if one
>>> irons that out in the program this encourages really bad patterns.
>>> Path lookup is iterative top down. One can't just randomly walk back up and
>>> assume that's equivalent.
>> 
>> I understand that walk-up is not equivalent to walk down. But it is probably
>> accurate enough for some security policies. For example, LSM LandLock uses
>> similar logic in the file_open hook (file security/landlock/fs.c, function 
>> is_access_to_paths_allowed).
> 
> I'm not well-versed in landlock so I'll let Mickaël comment on this with
> more details but there's very important restrictions and differences
> here.
> 
> Landlock expresses security policies with file hierarchies and
> security_inode_permission() doesn't and cannot have access to that.
> 
> Landlock is subject to the same problem that the BPF is here. Namely
> that the VFS permission checking could have been done on a path walk
> completely different from the path walk that is checked when walking
> back up from security_file_open().
> 
> But because landlock works with a deny-by-default security policy this
> is ok and it takes overmounts into account etc.
> 
>> 
>> To summary my thoughts here. I think we need:
>> 
>> 1. Change security_inode_permission to take dentry instead of inode.
> 
> Sorry, no.
> 
>> 2. Still add bpf_dget_parent. We will use it with security_inode_permission
>>   so that we can propagate flags from parents to children. We will need
>>   a bpf_dput as well. 
>> 3. There are pros and cons with different approaches to implement this
>>   policy (tags on directory work for all files in it). We probably need 
>>   the policy writer to decide with one to use. From BPF's POV, dget_parent
>>   is "safe", because it won't crash the system. It may encourage some bad
>>   patterns, but it appears to be required in some use cases.
> 
> You cannot just walk a path upwards and check permissions and assume
> that this is safe unless you have a clear idea what makes it safe in
> this scenario. Landlock has afaict. But so far you only have a vague
> sketch of checking permissions walking upwards and retrieving xattrs
> without any notion of the problems involved.

I am sorry for being vague with the use case here. We are trying to 
cover a few different use cases, such as sandboxing, allowlisting 
certain operations to selected binaries, prevent operation errors, etc. 
For this work, we are looking for the right building blocks to enable
these use cases. 

> If you provide a bpf_get_parent() api for userspace to consume you'll
> end up providing them with an api that is extremly easy to misuse.

Does this make sense to have higher level API that walks up the path, 
so that it takes mounts into account. It can probably be something like:

int bpf_get_parent_path(struct path *p) {
again:
    if (p->dentry == p->mnt.mnt_root) {
        follow_up(p);
        goto again;
    }
    if (unlikely(IS_ROOT(p->dentry))) {
        return PARENT_WALK_DONE;  
    }
    parent_dentry = dget_parent(p->dentry);
    dput(p->dentry);
    p->dentry = parent_dentry;
    return PARENT_WALK_NEXT; 
}

This will handle the mount. However, we cannot guarantee deny-by-default
policies like LandLock does, because this is just a building block of 
some security policies. 

Is this something we can give a try with?

Thanks,
Song
Song Liu Aug. 19, 2024, 8:35 p.m. UTC | #12
Hi Mickaël, 

> On Aug 19, 2024, at 6:12 AM, Mickaël Salaün <mic@digikod.net> wrote:

[...]

>> But because landlock works with a deny-by-default security policy this
>> is ok and it takes overmounts into account etc.
> 
> Correct. Another point is that Landlock uses the file's path (i.e.
> dentry + mnt) to walk down to the parent.  Only using the dentry would
> be incorrect for most use cases (i.e. any system with more than one
> mount point).

Thanks for highlighting the difference. Let me see whether we can bridge
the gap for this set. 

[...]

>>> 
>>> 1. Change security_inode_permission to take dentry instead of inode.
>> 
>> Sorry, no.
>> 
>>> 2. Still add bpf_dget_parent. We will use it with security_inode_permission
>>>   so that we can propagate flags from parents to children. We will need
>>>   a bpf_dput as well. 
>>> 3. There are pros and cons with different approaches to implement this
>>>   policy (tags on directory work for all files in it). We probably need 
>>>   the policy writer to decide with one to use. From BPF's POV, dget_parent
>>>   is "safe", because it won't crash the system. It may encourage some bad
>>>   patterns, but it appears to be required in some use cases.
>> 
>> You cannot just walk a path upwards and check permissions and assume
>> that this is safe unless you have a clear idea what makes it safe in
>> this scenario. Landlock has afaict. But so far you only have a vague
>> sketch of checking permissions walking upwards and retrieving xattrs
>> without any notion of the problems involved.
> 
> Something to keep in mind is that relying on xattr to label files
> requires to deny sanboxed processes to change this xattr, otherwise it
> would be trivial to bypass such a sandbox.  Sandboxing must be though as
> a whole and Landlock's design for file system access control takes into
> account all kind of file system operations that could bypass a sandbox
> policy (e.g. mount operations), and also protects from impersonations.

Thanks for sharing these experiences! 

> What is the use case for this patch series?  Couldn't Landlock be used
> for that?

We have multiple use cases. We can use Landlock for some of them. The 
primary goal of this patchset is to add useful building blocks to BPF LSM
so that we can build effective and flexible security policies for various
use cases. These building blocks alone won't be very useful. For example,
as you pointed out, to make xattr labels useful, we need some policies 
for xattr read/write.

Does this make sense?

Thanks,
Song
Song Liu Aug. 20, 2024, 5:42 a.m. UTC | #13
Hi Christian, 

> On Aug 19, 2024, at 1:25 PM, Song Liu <songliubraving@fb.com> wrote:
> 
> Hi Christian, 

[...]

> If you provide a bpf_get_parent() api for userspace to consume you'll
>> end up providing them with an api that is extremly easy to misuse.
> 
> Does this make sense to have higher level API that walks up the path, 
> so that it takes mounts into account. It can probably be something like:
> 
> int bpf_get_parent_path(struct path *p) {
> again:
>    if (p->dentry == p->mnt.mnt_root) {
>        follow_up(p);
>        goto again;
>    }
>    if (unlikely(IS_ROOT(p->dentry))) {
>        return PARENT_WALK_DONE;  
>    }
>    parent_dentry = dget_parent(p->dentry);
>    dput(p->dentry);
>    p->dentry = parent_dentry;
>    return PARENT_WALK_NEXT; 
> }
> 
> This will handle the mount. However, we cannot guarantee deny-by-default
> policies like LandLock does, because this is just a building block of 
> some security policies. 

I guess the above is not really clear. Here is a prototype I got. 
With the kernel diff attached below, we are able to do something
like:

SEC("lsm.s/file_open")
int BPF_PROG(test_file_open, struct file *f)
{
	/* ... */

        bpf_for_each(dentry, dentry, &f->f_path, BPF_DENTRY_ITER_TO_ROOT) {
                ret = bpf_get_dentry_xattr(dentry, "user.kfunc", &value_ptr);
		/* do work with the xattr in value_ptr */
        }

	/* ... */
}

With this helper, the user cannot walk the tree randomly. Instead, 
the walk has to follow some pattern, namely, TO_ROOT, TO_MNT_ROOT, 
etc. And helper makes sure the walk is safe. 

Does this solution make sense to you? 

Thanks,
Song



The kernel diff below. 
============================== 8< ===============================


diff --git c/fs/bpf_fs_kfuncs.c w/fs/bpf_fs_kfuncs.c
index 3fe9f59ef867..4b1400dec984 100644
--- c/fs/bpf_fs_kfuncs.c
+++ w/fs/bpf_fs_kfuncs.c
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
+#include <linux/namei.h>
 #include <linux/xattr.h>

 __bpf_kfunc_start_defs();
@@ -154,13 +155,91 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,

 __bpf_kfunc_end_defs();

+struct bpf_iter_dentry {
+       __u64 __opaque[3];
+} __attribute__((aligned(8)));
+
+struct bpf_iter_dentry_kern {
+       struct path path;
+       unsigned int flags;
+} __attribute__((aligned(8)));
+
+enum {
+       /* all the parent paths, until root (/) */
+       BPF_DENTRY_ITER_TO_ROOT,
+       /* all the parent paths, until mnt root */
+       BPF_DENTRY_ITER_TO_MNT_ROOT,
+};
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_iter_dentry_new(struct bpf_iter_dentry *it,
+                                        struct path *path, unsigned int flags)
+{
+       struct bpf_iter_dentry_kern *kit = (void*)it;
+
+       BUILD_BUG_ON(sizeof(struct bpf_iter_dentry_kern) >
+                    sizeof(struct bpf_iter_dentry));
+       BUILD_BUG_ON(__alignof__(struct bpf_iter_dentry_kern) !=
+                    __alignof__(struct bpf_iter_dentry));
+
+       if (flags)
+               return -EINVAL;
+
+       switch (flags) {
+       case BPF_DENTRY_ITER_TO_ROOT:
+       case BPF_DENTRY_ITER_TO_MNT_ROOT:
+               break;
+       default:
+               return -EINVAL;
+       }
+       kit->path = *path;
+       path_get(&kit->path);
+       kit->flags = flags;
+       return 0;
+}
+
+__bpf_kfunc struct dentry *bpf_iter_dentry_next(struct bpf_iter_dentry *it)
+{
+       struct bpf_iter_dentry_kern *kit = (void*)it;
+       struct dentry *parent_dentry;
+
+       if (unlikely(IS_ROOT(kit->path.dentry)))
+               return NULL;
+
+jump_up:
+       if (kit->path.dentry == kit->path.mnt->mnt_root) {
+               if (kit->flags == BPF_DENTRY_ITER_TO_MNT_ROOT)
+                       return NULL;
+               if (follow_up(&kit->path)) {
+                       goto jump_up;
+               }
+       }
+       parent_dentry = dget_parent(kit->path.dentry);
+       dput(kit->path.dentry);
+       kit->path.dentry = parent_dentry;
+       return parent_dentry;
+}
+
+__bpf_kfunc void bpf_iter_dentry_destroy(struct bpf_iter_dentry *it)
+{
+       struct bpf_iter_dentry_kern *kit = (void*)it;
+
+       path_put(&kit->path);
+}
+
+__bpf_kfunc_end_defs();
+
 BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
 BTF_ID_FLAGS(func, bpf_get_task_exe_file,
             KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE)    /* Will fix this later */
 BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_iter_dentry_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_iter_dentry_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_dentry_destroy, KF_ITER_DESTROY)
 BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)

 static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
Al Viro Aug. 20, 2024, 6:29 a.m. UTC | #14
On Mon, Aug 19, 2024 at 08:25:38PM +0000, Song Liu wrote:

> int bpf_get_parent_path(struct path *p) {
> again:
>     if (p->dentry == p->mnt.mnt_root) {
>         follow_up(p);
>         goto again;
>     }
>     if (unlikely(IS_ROOT(p->dentry))) {
>         return PARENT_WALK_DONE;  
>     }
>     parent_dentry = dget_parent(p->dentry);
>     dput(p->dentry);
>     p->dentry = parent_dentry;
>     return PARENT_WALK_NEXT; 
> }
> 
> This will handle the mount. However, we cannot guarantee deny-by-default
> policies like LandLock does, because this is just a building block of 
> some security policies. 

You do realize that above is racy as hell, right?

Filesystem objects do get moved around.  You can, theoretically, play with
rename_lock, but that is highly antisocial.

What's more, _mounts_ can get moved around.  That is to say, there is no
such thing as stable canonical pathname of a file.
Song Liu Aug. 20, 2024, 7:23 a.m. UTC | #15
> On Aug 19, 2024, at 11:29 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> 
> On Mon, Aug 19, 2024 at 08:25:38PM +0000, Song Liu wrote:
> 
>> int bpf_get_parent_path(struct path *p) {
>> again:
>>    if (p->dentry == p->mnt.mnt_root) {
>>        follow_up(p);
>>        goto again;
>>    }
>>    if (unlikely(IS_ROOT(p->dentry))) {
>>        return PARENT_WALK_DONE;  
>>    }
>>    parent_dentry = dget_parent(p->dentry);
>>    dput(p->dentry);
>>    p->dentry = parent_dentry;
>>    return PARENT_WALK_NEXT; 
>> }
>> 
>> This will handle the mount. However, we cannot guarantee deny-by-default
>> policies like LandLock does, because this is just a building block of 
>> some security policies.
> 
> You do realize that above is racy as hell, right?
> 
> Filesystem objects do get moved around.  You can, theoretically, play with
> rename_lock, but that is highly antisocial.

I do understand filesystem objects may get moved around. However, I am not
sure whether we have to avoid all the race conditions (and whether it is
really possible to avoid all race conditions). 

> What's more, _mounts_ can get moved around.  That is to say, there is no
> such thing as stable canonical pathname of a file.

Maybe I should really step back and ask for high level suggestions. 

We are hoping to tag all files in a directory with xattr (or something
else) on the directory. For example, a xattr "Do_not_rename" on /usr 
should block rename of all files inside /usr. 

Our original idea is to start from security_file_open() hook, and walk 
up the tree (/usr/bin/gcc => /usr/bin => /usr). However, this appears
to be wasteful and unreliable, and Christian suggested we should use a 
combination of security_inode_permission and security_file_open. I 
tried to build something on this direction, and hits a few issues:

1. Getting xattr from security_inode_permission() is not easy. Some
   FS requires dentry to get xattr. 
2. Finding parent from security_inode_permission() is also tricky.
   (maybe as trick as doing dget_parent() from security_file_open?)
   We need tag on /usr to work on /usr/bin. But how do we know /usr
   is /usr/bin's parent?

For the original goal… tag all files in a directory with xattr on 
the directory, is it possible at all? If not, we will go back and
implement something to tag all the files one at a time. If it is
indeed possible, what's the right way to do it, and what are the
race conditions we need to avoid and to accept?

Comments and suggestions are highly appreciated. 

Thanks,
Song
Mickaël Salaün Aug. 20, 2024, 12:45 p.m. UTC | #16
On Mon, Aug 19, 2024 at 08:35:53PM +0000, Song Liu wrote:
> Hi Mickaël, 
> 
> > On Aug 19, 2024, at 6:12 AM, Mickaël Salaün <mic@digikod.net> wrote:
> 
> [...]
> 
> >> But because landlock works with a deny-by-default security policy this
> >> is ok and it takes overmounts into account etc.
> > 
> > Correct. Another point is that Landlock uses the file's path (i.e.
> > dentry + mnt) to walk down to the parent.  Only using the dentry would
> > be incorrect for most use cases (i.e. any system with more than one
> > mount point).
> 
> Thanks for highlighting the difference. Let me see whether we can bridge
> the gap for this set. 
> 
> [...]
> 
> >>> 
> >>> 1. Change security_inode_permission to take dentry instead of inode.
> >> 
> >> Sorry, no.
> >> 
> >>> 2. Still add bpf_dget_parent. We will use it with security_inode_permission
> >>>   so that we can propagate flags from parents to children. We will need
> >>>   a bpf_dput as well. 
> >>> 3. There are pros and cons with different approaches to implement this
> >>>   policy (tags on directory work for all files in it). We probably need 
> >>>   the policy writer to decide with one to use. From BPF's POV, dget_parent
> >>>   is "safe", because it won't crash the system. It may encourage some bad
> >>>   patterns, but it appears to be required in some use cases.
> >> 
> >> You cannot just walk a path upwards and check permissions and assume
> >> that this is safe unless you have a clear idea what makes it safe in
> >> this scenario. Landlock has afaict. But so far you only have a vague
> >> sketch of checking permissions walking upwards and retrieving xattrs
> >> without any notion of the problems involved.
> > 
> > Something to keep in mind is that relying on xattr to label files
> > requires to deny sanboxed processes to change this xattr, otherwise it
> > would be trivial to bypass such a sandbox.  Sandboxing must be though as
> > a whole and Landlock's design for file system access control takes into
> > account all kind of file system operations that could bypass a sandbox
> > policy (e.g. mount operations), and also protects from impersonations.
> 
> Thanks for sharing these experiences! 
> 
> > What is the use case for this patch series?  Couldn't Landlock be used
> > for that?
> 
> We have multiple use cases. We can use Landlock for some of them. The 
> primary goal of this patchset is to add useful building blocks to BPF LSM
> so that we can build effective and flexible security policies for various
> use cases. These building blocks alone won't be very useful. For example,
> as you pointed out, to make xattr labels useful, we need some policies 
> for xattr read/write.
> 
> Does this make sense?

Yes, but I think you'll end up with a code pretty close to the Landlock
implementation.

What about adding BPF hooks to Landlock?  User space could create
Landlock sandboxes that would delegate the denials to a BPF program,
which could then also allow such access, but without directly handling
nor reimplementing filesystem path walks.  The Landlock user space ABI
changes would mainly be a new landlock_ruleset_attr field to explicitly
ask for a (system-wide) BPF program to handle access requests if no
Landlock rule allow them.  We could also tie a BPF data (i.e. blob) to
Landlock domains for consistent sandbox management.  One of the
advantage of this approach is to only run related BPF programs if the
sandbox policy would deny the request.  Another advantage would be to
leverage the Landlock user space interface to let any program partially
define and extend their security policy.

I'm working on implementing audit support for Landlock [1] and I think
these changes could be useful to implement BPF hooks to run a dedicated
BPF program type per event (see landlock_log_denial() and struct
landlock_request).  I'll get back on this patch series in September.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/mic/linux.git/log/?h=wip-audit
Song Liu Aug. 20, 2024, 5:42 p.m. UTC | #17
> On Aug 20, 2024, at 5:45 AM, Mickaël Salaün <mic@digikod.net> wrote:
> 
> On Mon, Aug 19, 2024 at 08:35:53PM +0000, Song Liu wrote:
>> Hi Mickaël, 
>> 
>>> On Aug 19, 2024, at 6:12 AM, Mickaël Salaün <mic@digikod.net> wrote:
>> 
>> [...]
>> 
>>>> But because landlock works with a deny-by-default security policy this
>>>> is ok and it takes overmounts into account etc.
>>> 
>>> Correct. Another point is that Landlock uses the file's path (i.e.
>>> dentry + mnt) to walk down to the parent.  Only using the dentry would
>>> be incorrect for most use cases (i.e. any system with more than one
>>> mount point).
>> 
>> Thanks for highlighting the difference. Let me see whether we can bridge
>> the gap for this set. 
>> 
>> [...]
>> 
>>>>> 
>>>>> 1. Change security_inode_permission to take dentry instead of inode.
>>>> 
>>>> Sorry, no.
>>>> 
>>>>> 2. Still add bpf_dget_parent. We will use it with security_inode_permission
>>>>>  so that we can propagate flags from parents to children. We will need
>>>>>  a bpf_dput as well. 
>>>>> 3. There are pros and cons with different approaches to implement this
>>>>>  policy (tags on directory work for all files in it). We probably need 
>>>>>  the policy writer to decide with one to use. From BPF's POV, dget_parent
>>>>>  is "safe", because it won't crash the system. It may encourage some bad
>>>>>  patterns, but it appears to be required in some use cases.
>>>> 
>>>> You cannot just walk a path upwards and check permissions and assume
>>>> that this is safe unless you have a clear idea what makes it safe in
>>>> this scenario. Landlock has afaict. But so far you only have a vague
>>>> sketch of checking permissions walking upwards and retrieving xattrs
>>>> without any notion of the problems involved.
>>> 
>>> Something to keep in mind is that relying on xattr to label files
>>> requires to deny sanboxed processes to change this xattr, otherwise it
>>> would be trivial to bypass such a sandbox.  Sandboxing must be though as
>>> a whole and Landlock's design for file system access control takes into
>>> account all kind of file system operations that could bypass a sandbox
>>> policy (e.g. mount operations), and also protects from impersonations.
>> 
>> Thanks for sharing these experiences! 
>> 
>>> What is the use case for this patch series?  Couldn't Landlock be used
>>> for that?
>> 
>> We have multiple use cases. We can use Landlock for some of them. The 
>> primary goal of this patchset is to add useful building blocks to BPF LSM
>> so that we can build effective and flexible security policies for various
>> use cases. These building blocks alone won't be very useful. For example,
>> as you pointed out, to make xattr labels useful, we need some policies 
>> for xattr read/write.
>> 
>> Does this make sense?
> 
> Yes, but I think you'll end up with a code pretty close to the Landlock
> implementation.

At the moment, I think it is not possible to do full Landlock logic in
BPF. We are learning from other LSMs. 

> What about adding BPF hooks to Landlock?  User space could create
> Landlock sandboxes that would delegate the denials to a BPF program,
> which could then also allow such access, but without directly handling
> nor reimplementing filesystem path walks.  The Landlock user space ABI
> changes would mainly be a new landlock_ruleset_attr field to explicitly
> ask for a (system-wide) BPF program to handle access requests if no
> Landlock rule allow them.  We could also tie a BPF data (i.e. blob) to
> Landlock domains for consistent sandbox management.  One of the
> advantage of this approach is to only run related BPF programs if the
> sandbox policy would deny the request.  Another advantage would be to
> leverage the Landlock user space interface to let any program partially
> define and extend their security policy.

Given there is BPF LSM, I have never thought about adding BPF hooks to 
Landlock or other LSMs. I personally would prefer to have a common API
to walk the path, maybe something like vma_iterator. But I need to read
more code to understand whether this makes sense?

Thanks,
Song

> I'm working on implementing audit support for Landlock [1] and I think
> these changes could be useful to implement BPF hooks to run a dedicated
> BPF program type per event (see landlock_log_denial() and struct
> landlock_request).  I'll get back on this patch series in September.
> 
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/mic/linux.git/log/?h=wip-audit
Paul Moore Aug. 20, 2024, 9:11 p.m. UTC | #18
On Tue, Aug 20, 2024 at 1:43 PM Song Liu <songliubraving@meta.com> wrote:
> > On Aug 20, 2024, at 5:45 AM, Mickaël Salaün <mic@digikod.net> wrote:

...

> > What about adding BPF hooks to Landlock?  User space could create
> > Landlock sandboxes that would delegate the denials to a BPF program,
> > which could then also allow such access, but without directly handling
> > nor reimplementing filesystem path walks.  The Landlock user space ABI
> > changes would mainly be a new landlock_ruleset_attr field to explicitly
> > ask for a (system-wide) BPF program to handle access requests if no
> > Landlock rule allow them.  We could also tie a BPF data (i.e. blob) to
> > Landlock domains for consistent sandbox management.  One of the
> > advantage of this approach is to only run related BPF programs if the
> > sandbox policy would deny the request.  Another advantage would be to
> > leverage the Landlock user space interface to let any program partially
> > define and extend their security policy.
>
> Given there is BPF LSM, I have never thought about adding BPF hooks to
> Landlock or other LSMs. I personally would prefer to have a common API
> to walk the path, maybe something like vma_iterator. But I need to read
> more code to understand whether this makes sense?

Just so there isn't any confusion, I want to make sure that everyone
is clear that "adding BPF hooks to Landlock" should mean "add a new
Landlock specific BPF hook inside Landlock" and not "reuse existing
BPF LSM hooks inside Landlock".
Song Liu Aug. 21, 2024, 3:43 a.m. UTC | #19
> On Aug 20, 2024, at 2:11 PM, Paul Moore <paul@paul-moore.com> wrote:
> 
> On Tue, Aug 20, 2024 at 1:43 PM Song Liu <songliubraving@meta.com> wrote:
>>> On Aug 20, 2024, at 5:45 AM, Mickaël Salaün <mic@digikod.net> wrote:
> 
> ...
> 
>>> What about adding BPF hooks to Landlock?  User space could create
>>> Landlock sandboxes that would delegate the denials to a BPF program,
>>> which could then also allow such access, but without directly handling
>>> nor reimplementing filesystem path walks.  The Landlock user space ABI
>>> changes would mainly be a new landlock_ruleset_attr field to explicitly
>>> ask for a (system-wide) BPF program to handle access requests if no
>>> Landlock rule allow them.  We could also tie a BPF data (i.e. blob) to
>>> Landlock domains for consistent sandbox management.  One of the
>>> advantage of this approach is to only run related BPF programs if the
>>> sandbox policy would deny the request.  Another advantage would be to
>>> leverage the Landlock user space interface to let any program partially
>>> define and extend their security policy.
>> 
>> Given there is BPF LSM, I have never thought about adding BPF hooks to
>> Landlock or other LSMs. I personally would prefer to have a common API
>> to walk the path, maybe something like vma_iterator. But I need to read
>> more code to understand whether this makes sense?
> 
> Just so there isn't any confusion, I want to make sure that everyone
> is clear that "adding BPF hooks to Landlock" should mean "add a new
> Landlock specific BPF hook inside Landlock" and not "reuse existing
> BPF LSM hooks inside Landlock".

I think we are on the same page. My understanding of Mickaël's idea is
to add some brand new hooks to Landlock code, so that Landlock can
use BPF program to make some decisions. 

Thanks,
Song
Mickaël Salaün Aug. 23, 2024, 10:38 a.m. UTC | #20
On Wed, Aug 21, 2024 at 03:43:48AM +0000, Song Liu wrote:
> 
> 
> > On Aug 20, 2024, at 2:11 PM, Paul Moore <paul@paul-moore.com> wrote:
> > 
> > On Tue, Aug 20, 2024 at 1:43 PM Song Liu <songliubraving@meta.com> wrote:
> >>> On Aug 20, 2024, at 5:45 AM, Mickaël Salaün <mic@digikod.net> wrote:
> > 
> > ...
> > 
> >>> What about adding BPF hooks to Landlock?  User space could create
> >>> Landlock sandboxes that would delegate the denials to a BPF program,
> >>> which could then also allow such access, but without directly handling
> >>> nor reimplementing filesystem path walks.  The Landlock user space ABI
> >>> changes would mainly be a new landlock_ruleset_attr field to explicitly
> >>> ask for a (system-wide) BPF program to handle access requests if no
> >>> Landlock rule allow them.  We could also tie a BPF data (i.e. blob) to
> >>> Landlock domains for consistent sandbox management.  One of the
> >>> advantage of this approach is to only run related BPF programs if the
> >>> sandbox policy would deny the request.  Another advantage would be to
> >>> leverage the Landlock user space interface to let any program partially
> >>> define and extend their security policy.
> >> 
> >> Given there is BPF LSM, I have never thought about adding BPF hooks to
> >> Landlock or other LSMs. I personally would prefer to have a common API
> >> to walk the path, maybe something like vma_iterator. But I need to read
> >> more code to understand whether this makes sense?

I think it would not be an issue to use BPF Landlock hooks along with
BPF LSM hooks for the same global policy.  This could also use the
Landlock domain concept for your use case, including domain inheritance,
domain identification, cross-domain protections... to avoid
reimplementing the same semantic (and going through the same issues).
Limiting the BPF program calls could also improve performance.

> > 
> > Just so there isn't any confusion, I want to make sure that everyone
> > is clear that "adding BPF hooks to Landlock" should mean "add a new
> > Landlock specific BPF hook inside Landlock" and not "reuse existing
> > BPF LSM hooks inside Landlock".
> 
> I think we are on the same page. My understanding of Mickaël's idea is
> to add some brand new hooks to Landlock code, so that Landlock can
> use BPF program to make some decisions. 

Correct
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
index 37056ba73847..a960cfbe8907 100644
--- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -2,17 +2,19 @@ 
 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
 
 #include <stdlib.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/xattr.h>
 #include <linux/fsverity.h>
 #include <unistd.h>
 #include <test_progs.h>
 #include "test_get_xattr.skel.h"
+#include "test_dentry_xattr.skel.h"
 #include "test_fsverity.skel.h"
 
 static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
 
-static void test_xattr(void)
+static void test_file_xattr(void)
 {
 	struct test_get_xattr *skel = NULL;
 	int fd = -1, err;
@@ -50,7 +52,8 @@  static void test_xattr(void)
 	if (!ASSERT_GE(fd, 0, "open_file"))
 		goto out;
 
-	ASSERT_EQ(skel->bss->found_xattr, 1, "found_xattr");
+	ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
+	ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
 
 out:
 	close(fd);
@@ -58,6 +61,53 @@  static void test_xattr(void)
 	remove(testfile);
 }
 
+static void test_directory_xattr(void)
+{
+	struct test_dentry_xattr *skel = NULL;
+	static const char * const paths[] = {
+		"/tmp/a",
+		"/tmp/a/b",
+		"/tmp/a/b/c",
+	};
+	const char *file = "/tmp/a/b/c/d";
+	int i, j, err, fd;
+
+	for (i = 0; i < sizeof(paths) / sizeof(char *); i++) {
+		err = mkdir(paths[i], 0755);
+		if (!ASSERT_OK(err, "mkdir"))
+			goto out;
+		err = setxattr(paths[i], "user.kfunc", "hello", sizeof("hello"), 0);
+		if (!ASSERT_OK(err, "setxattr")) {
+			i++;
+			goto out;
+		}
+	}
+
+	skel = test_dentry_xattr__open_and_load();
+
+	if (!ASSERT_OK_PTR(skel, "test_dentry_xattr__open_and_load"))
+		goto out;
+
+	skel->bss->monitored_pid = getpid();
+	err = test_dentry_xattr__attach(skel);
+
+	if (!ASSERT_OK(err, "test_dentry__xattr__attach"))
+		goto out;
+
+	fd = open(file, O_CREAT | O_RDONLY, 0644);
+	if (!ASSERT_GE(fd, 0, "open_file"))
+		goto out;
+
+	ASSERT_EQ(skel->bss->number_of_xattr_found, 3, "number_of_xattr_found");
+	close(fd);
+out:
+	test_dentry_xattr__destroy(skel);
+	remove(file);
+	for (j = i - 1; j >= 0; j--)
+		rmdir(paths[j]);
+}
+
+
 #ifndef SHA256_DIGEST_SIZE
 #define SHA256_DIGEST_SIZE      32
 #endif
@@ -134,8 +184,11 @@  static void test_fsverity(void)
 
 void test_fs_kfuncs(void)
 {
-	if (test__start_subtest("xattr"))
-		test_xattr();
+	if (test__start_subtest("file_xattr"))
+		test_file_xattr();
+
+	if (test__start_subtest("dentry_xattr"))
+		test_directory_xattr();
 
 	if (test__start_subtest("fsverity"))
 		test_fsverity();
diff --git a/tools/testing/selftests/bpf/progs/test_dentry_xattr.c b/tools/testing/selftests/bpf/progs/test_dentry_xattr.c
new file mode 100644
index 000000000000..d2e378b2e2d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_dentry_xattr.c
@@ -0,0 +1,46 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+__u32 number_of_xattr_found;
+
+static const char expected_value[] = "hello";
+char value[32];
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+	struct bpf_dynptr value_ptr;
+	struct dentry *dentry, *prev_dentry;
+	__u32 pid, matches = 0;
+	int i, ret;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid != monitored_pid)
+		return 0;
+
+	bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
+
+	dentry = bpf_file_dentry(f);
+
+	for (i = 0; i < 10; i++) {
+		ret = bpf_get_dentry_xattr(dentry, "user.kfunc", &value_ptr);
+		if (ret == sizeof(expected_value) &&
+		    !bpf_strncmp(value, ret, expected_value))
+			matches++;
+
+		prev_dentry = dentry;
+		dentry = bpf_dget_parent(prev_dentry);
+		bpf_dput(prev_dentry);
+	}
+
+	bpf_dput(dentry);
+	number_of_xattr_found = matches;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
index 7eb2a4e5a3e5..3b0dc6106ca5 100644
--- a/tools/testing/selftests/bpf/progs/test_get_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -9,7 +9,8 @@ 
 char _license[] SEC("license") = "GPL";
 
 __u32 monitored_pid;
-__u32 found_xattr;
+__u32 found_xattr_from_file;
+__u32 found_xattr_from_dentry;
 
 static const char expected_value[] = "hello";
 char value[32];
@@ -18,6 +19,7 @@  SEC("lsm.s/file_open")
 int BPF_PROG(test_file_open, struct file *f)
 {
 	struct bpf_dynptr value_ptr;
+	struct dentry *dentry;
 	__u32 pid;
 	int ret;
 
@@ -32,6 +34,16 @@  int BPF_PROG(test_file_open, struct file *f)
 		return 0;
 	if (bpf_strncmp(value, ret, expected_value))
 		return 0;
-	found_xattr = 1;
+	found_xattr_from_file = 1;
+
+	dentry = bpf_file_dentry(f);
+	ret = bpf_get_dentry_xattr(dentry, "user.kfuncs", &value_ptr);
+	bpf_dput(dentry);
+	if (ret != sizeof(expected_value))
+		return 0;
+	if (bpf_strncmp(value, ret, expected_value))
+		return 0;
+	found_xattr_from_dentry = 1;
+
 	return 0;
 }