diff mbox series

[v3,bpf-next,1/3] bpf: introduce new VFS based BPF kfuncs

Message ID 20240726085604.2369469-2-mattbobrowski@google.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series introduce new VFS based BPF kfuncs | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 661 this patch: 661
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 2 maintainers not CCed: viro@zeniv.linux.org.uk jack@suse.cz
netdev/build_clang success Errors and warnings before: 663 this patch: 663
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 667 this patch: 670
netdev/checkpatch fail CHECK: Please use a blank line after function/struct/union/enum declarations ERROR: code indent should use tabs where possible WARNING: Block comments should align the * on each line WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Matt Bobrowski July 26, 2024, 8:56 a.m. UTC
Add a new variant of bpf_d_path() named bpf_path_d_path() which takes
the form of a BPF kfunc and enforces KF_TRUSTED_ARGS semantics onto
its arguments.

This new d_path() based BPF kfunc variant is intended to address the
legacy bpf_d_path() BPF helper's susceptibility to memory corruption
issues [0, 1, 2] by ensuring to only operate on supplied arguments
which are deemed trusted by the BPF verifier. Typically, this means
that only pointers to a struct path which have been referenced counted
may be supplied.

In addition to the new bpf_path_d_path() BPF kfunc, we also add a
KF_ACQUIRE based BPF kfunc bpf_get_task_exe_file() and KF_RELEASE
counterpart BPF kfunc bpf_put_file(). This is so that the new
bpf_path_d_path() BPF kfunc can be used more flexibility from within
the context of a BPF LSM program. It's rather common to ascertain the
backing executable file for the calling process by performing the
following walk current->mm->exe_file while instrumenting a given
operation from the context of the BPF LSM program. However, walking
current->mm->exe_file directly is never deemed to be OK, and doing so
from both inside and outside of BPF LSM program context should be
considered as a bug. Using bpf_get_task_exe_file() and in turn
bpf_put_file() will allow BPF LSM programs to reliably get and put
references to current->mm->exe_file.

As of now, all the newly introduced BPF kfuncs within this patch are
limited to sleepable BPF LSM program types. Therefore, they may only
be called when a BPF LSM program is attached to one of the listed
attachment points defined within the sleepable_lsm_hooks BTF ID set.

[0] https://lore.kernel.org/bpf/CAG48ez0ppjcT=QxU-jtCUfb5xQb3mLr=5FcwddF_VKfEBPs_Dg@mail.gmail.com/
[1] https://lore.kernel.org/bpf/20230606181714.532998-1-jolsa@kernel.org/
[2] https://lore.kernel.org/bpf/20220219113744.1852259-1-memxor@gmail.com/

Signed-off-by: Matt Bobrowski <mattbobrowski@google.com>
---
 fs/Makefile        |   1 +
 fs/bpf_fs_kfuncs.c | 133 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 134 insertions(+)
 create mode 100644 fs/bpf_fs_kfuncs.c

Comments

Christian Brauner July 26, 2024, 1:18 p.m. UTC | #1
On Fri, Jul 26, 2024 at 08:56:02AM GMT, Matt Bobrowski wrote:
> Add a new variant of bpf_d_path() named bpf_path_d_path() which takes
> the form of a BPF kfunc and enforces KF_TRUSTED_ARGS semantics onto
> its arguments.
> 
> This new d_path() based BPF kfunc variant is intended to address the
> legacy bpf_d_path() BPF helper's susceptibility to memory corruption
> issues [0, 1, 2] by ensuring to only operate on supplied arguments
> which are deemed trusted by the BPF verifier. Typically, this means
> that only pointers to a struct path which have been referenced counted
> may be supplied.
> 
> In addition to the new bpf_path_d_path() BPF kfunc, we also add a
> KF_ACQUIRE based BPF kfunc bpf_get_task_exe_file() and KF_RELEASE
> counterpart BPF kfunc bpf_put_file(). This is so that the new
> bpf_path_d_path() BPF kfunc can be used more flexibility from within
> the context of a BPF LSM program. It's rather common to ascertain the
> backing executable file for the calling process by performing the
> following walk current->mm->exe_file while instrumenting a given
> operation from the context of the BPF LSM program. However, walking
> current->mm->exe_file directly is never deemed to be OK, and doing so
> from both inside and outside of BPF LSM program context should be
> considered as a bug. Using bpf_get_task_exe_file() and in turn
> bpf_put_file() will allow BPF LSM programs to reliably get and put
> references to current->mm->exe_file.
> 
> As of now, all the newly introduced BPF kfuncs within this patch are
> limited to sleepable BPF LSM program types. Therefore, they may only
> be called when a BPF LSM program is attached to one of the listed
> attachment points defined within the sleepable_lsm_hooks BTF ID set.
> 
> [0] https://lore.kernel.org/bpf/CAG48ez0ppjcT=QxU-jtCUfb5xQb3mLr=5FcwddF_VKfEBPs_Dg@mail.gmail.com/
> [1] https://lore.kernel.org/bpf/20230606181714.532998-1-jolsa@kernel.org/
> [2] https://lore.kernel.org/bpf/20220219113744.1852259-1-memxor@gmail.com/
> 
> Signed-off-by: Matt Bobrowski <mattbobrowski@google.com>
> ---
>  fs/Makefile        |   1 +
>  fs/bpf_fs_kfuncs.c | 133 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 134 insertions(+)
>  create mode 100644 fs/bpf_fs_kfuncs.c
> 
> diff --git a/fs/Makefile b/fs/Makefile
> index 6ecc9b0a53f2..61679fd587b7 100644
> --- a/fs/Makefile
> +++ b/fs/Makefile
> @@ -129,3 +129,4 @@ obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
>  obj-$(CONFIG_EROFS_FS)		+= erofs/
>  obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
>  obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
> +obj-$(CONFIG_BPF_LSM)		+= bpf_fs_kfuncs.o
> diff --git a/fs/bpf_fs_kfuncs.c b/fs/bpf_fs_kfuncs.c
> new file mode 100644
> index 000000000000..3813e2a83313
> --- /dev/null
> +++ b/fs/bpf_fs_kfuncs.c
> @@ -0,0 +1,133 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Google LLC. */
> +
> +#include <linux/bpf.h>
> +#include <linux/btf.h>
> +#include <linux/btf_ids.h>
> +#include <linux/dcache.h>
> +#include <linux/err.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include <linux/init.h>
> +#include <linux/mm.h>
> +#include <linux/path.h>
> +#include <linux/sched.h>
> +
> +__bpf_kfunc_start_defs();
> +/**
> + * bpf_get_task_exe_file - get a reference on the exe_file struct file member of
> + *                         the mm_struct that is nested within the supplied
> + *                         task_struct
> + * @task: task_struct of which the nested mm_struct exe_file member to get a
> + * reference on
> + *
> + * Get a reference on the exe_file struct file member field of the mm_struct
> + * nested within the supplied *task*. The referenced file pointer acquired by
> + * this BPF kfunc must be released using bpf_put_file(). Failing to call
> + * bpf_put_file() on the returned referenced struct file pointer that has been
> + * acquired by this BPF kfunc will result in the BPF program being rejected by
> + * the BPF verifier.
> + *
> + * This BPF kfunc may only be called from sleepable BPF LSM programs.
> + *
> + * Internally, this BPF kfunc leans on get_task_exe_file(), such that calling
> + * bpf_get_task_exe_file() would be analogous to calling get_task_exe_file()
> + * directly in kernel context.
> + *
> + * Return: A referenced struct file pointer to the exe_file member of the
> + * mm_struct that is nested within the supplied *task*. On error, NULL is
> + * returned.
> + */
> +__bpf_kfunc struct file *bpf_get_task_exe_file(struct task_struct *task)
> +{
> +	return get_task_exe_file(task);
> +}
> +
> +/**
> + * bpf_put_file - put a reference on the supplied file
> + * @file: file to put a reference on
> + *
> + * Put a reference on the supplied *file*. Only referenced file pointers may be
> + * passed to this BPF kfunc. Attempting to pass an unreferenced file pointer, or
> + * any other arbitrary pointer for that matter, will result in the BPF program
> + * being rejected by the BPF verifier.
> + *
> + * This BPF kfunc may only be called from sleepable BPF LSM programs. Though
> + * fput() can be called from IRQ context, we're enforcing sleepability here.
> + */
> +__bpf_kfunc void bpf_put_file(struct file *file)
> +{
> +	fput(file);
> +}
> +
> +/**
> + * bpf_path_d_path - resolve the pathname for the supplied path
> + * @path: path to resolve the pathname for
> + * @buf: buffer to return the resolved pathname in
> + * @buf__sz: length of the supplied buffer
> + *
> + * Resolve the pathname for the supplied *path* and store it in *buf*. This BPF
> + * kfunc is the safer variant of the legacy bpf_d_path() helper and should be
> + * used in place of bpf_d_path() whenever possible. It enforces KF_TRUSTED_ARGS
> + * semantics, meaning that the supplied *path* must itself hold a valid
> + * reference, or else the BPF program will be outright rejected by the BPF
> + * verifier.
> + *
> + * This BPF kfunc may only be called from sleepable BPF LSM programs.
> + *
> + * Return: A positive integer corresponding to the length of the resolved
> + * pathname in *buf*, including the NUL termination character. On error, a
> + * negative integer is returned.
> + */
> +__bpf_kfunc int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz)
> +{
> +	int len;
> +	char *ret;
> +
> +	if (buf__sz <= 0)
> +		return -EINVAL;

size_t is unsigned so this should just be !buf__sz I can fix that
though. The __sz thing has meaning to the verifier afaict so I guess
that's fine as name then.
Matt Bobrowski July 26, 2024, 8:31 p.m. UTC | #2
On Fri, Jul 26, 2024 at 03:18:25PM +0200, Christian Brauner wrote:
> On Fri, Jul 26, 2024 at 08:56:02AM GMT, Matt Bobrowski wrote:
> > Add a new variant of bpf_d_path() named bpf_path_d_path() which takes
> > the form of a BPF kfunc and enforces KF_TRUSTED_ARGS semantics onto
> > its arguments.
> > 
> > This new d_path() based BPF kfunc variant is intended to address the
> > legacy bpf_d_path() BPF helper's susceptibility to memory corruption
> > issues [0, 1, 2] by ensuring to only operate on supplied arguments
> > which are deemed trusted by the BPF verifier. Typically, this means
> > that only pointers to a struct path which have been referenced counted
> > may be supplied.
> > 
> > In addition to the new bpf_path_d_path() BPF kfunc, we also add a
> > KF_ACQUIRE based BPF kfunc bpf_get_task_exe_file() and KF_RELEASE
> > counterpart BPF kfunc bpf_put_file(). This is so that the new
> > bpf_path_d_path() BPF kfunc can be used more flexibility from within
> > the context of a BPF LSM program. It's rather common to ascertain the
> > backing executable file for the calling process by performing the
> > following walk current->mm->exe_file while instrumenting a given
> > operation from the context of the BPF LSM program. However, walking
> > current->mm->exe_file directly is never deemed to be OK, and doing so
> > from both inside and outside of BPF LSM program context should be
> > considered as a bug. Using bpf_get_task_exe_file() and in turn
> > bpf_put_file() will allow BPF LSM programs to reliably get and put
> > references to current->mm->exe_file.
> > 
> > As of now, all the newly introduced BPF kfuncs within this patch are
> > limited to sleepable BPF LSM program types. Therefore, they may only
> > be called when a BPF LSM program is attached to one of the listed
> > attachment points defined within the sleepable_lsm_hooks BTF ID set.
> > 
> > [0] https://lore.kernel.org/bpf/CAG48ez0ppjcT=QxU-jtCUfb5xQb3mLr=5FcwddF_VKfEBPs_Dg@mail.gmail.com/
> > [1] https://lore.kernel.org/bpf/20230606181714.532998-1-jolsa@kernel.org/
> > [2] https://lore.kernel.org/bpf/20220219113744.1852259-1-memxor@gmail.com/
> > 
> > Signed-off-by: Matt Bobrowski <mattbobrowski@google.com>
> > ---
> >  fs/Makefile        |   1 +
> >  fs/bpf_fs_kfuncs.c | 133 +++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 134 insertions(+)
> >  create mode 100644 fs/bpf_fs_kfuncs.c
> > 
> > diff --git a/fs/Makefile b/fs/Makefile
> > index 6ecc9b0a53f2..61679fd587b7 100644
> > --- a/fs/Makefile
> > +++ b/fs/Makefile
> > @@ -129,3 +129,4 @@ obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
> >  obj-$(CONFIG_EROFS_FS)		+= erofs/
> >  obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
> >  obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
> > +obj-$(CONFIG_BPF_LSM)		+= bpf_fs_kfuncs.o
> > diff --git a/fs/bpf_fs_kfuncs.c b/fs/bpf_fs_kfuncs.c
> > new file mode 100644
> > index 000000000000..3813e2a83313
> > --- /dev/null
> > +++ b/fs/bpf_fs_kfuncs.c
> > @@ -0,0 +1,133 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright (c) 2024 Google LLC. */
> > +
> > +#include <linux/bpf.h>
> > +#include <linux/btf.h>
> > +#include <linux/btf_ids.h>
> > +#include <linux/dcache.h>
> > +#include <linux/err.h>
> > +#include <linux/fs.h>
> > +#include <linux/file.h>
> > +#include <linux/init.h>
> > +#include <linux/mm.h>
> > +#include <linux/path.h>
> > +#include <linux/sched.h>
> > +
> > +__bpf_kfunc_start_defs();
> > +/**
> > + * bpf_get_task_exe_file - get a reference on the exe_file struct file member of
> > + *                         the mm_struct that is nested within the supplied
> > + *                         task_struct
> > + * @task: task_struct of which the nested mm_struct exe_file member to get a
> > + * reference on
> > + *
> > + * Get a reference on the exe_file struct file member field of the mm_struct
> > + * nested within the supplied *task*. The referenced file pointer acquired by
> > + * this BPF kfunc must be released using bpf_put_file(). Failing to call
> > + * bpf_put_file() on the returned referenced struct file pointer that has been
> > + * acquired by this BPF kfunc will result in the BPF program being rejected by
> > + * the BPF verifier.
> > + *
> > + * This BPF kfunc may only be called from sleepable BPF LSM programs.
> > + *
> > + * Internally, this BPF kfunc leans on get_task_exe_file(), such that calling
> > + * bpf_get_task_exe_file() would be analogous to calling get_task_exe_file()
> > + * directly in kernel context.
> > + *
> > + * Return: A referenced struct file pointer to the exe_file member of the
> > + * mm_struct that is nested within the supplied *task*. On error, NULL is
> > + * returned.
> > + */
> > +__bpf_kfunc struct file *bpf_get_task_exe_file(struct task_struct *task)
> > +{
> > +	return get_task_exe_file(task);
> > +}
> > +
> > +/**
> > + * bpf_put_file - put a reference on the supplied file
> > + * @file: file to put a reference on
> > + *
> > + * Put a reference on the supplied *file*. Only referenced file pointers may be
> > + * passed to this BPF kfunc. Attempting to pass an unreferenced file pointer, or
> > + * any other arbitrary pointer for that matter, will result in the BPF program
> > + * being rejected by the BPF verifier.
> > + *
> > + * This BPF kfunc may only be called from sleepable BPF LSM programs. Though
> > + * fput() can be called from IRQ context, we're enforcing sleepability here.
> > + */
> > +__bpf_kfunc void bpf_put_file(struct file *file)
> > +{
> > +	fput(file);
> > +}
> > +
> > +/**
> > + * bpf_path_d_path - resolve the pathname for the supplied path
> > + * @path: path to resolve the pathname for
> > + * @buf: buffer to return the resolved pathname in
> > + * @buf__sz: length of the supplied buffer
> > + *
> > + * Resolve the pathname for the supplied *path* and store it in *buf*. This BPF
> > + * kfunc is the safer variant of the legacy bpf_d_path() helper and should be
> > + * used in place of bpf_d_path() whenever possible. It enforces KF_TRUSTED_ARGS
> > + * semantics, meaning that the supplied *path* must itself hold a valid
> > + * reference, or else the BPF program will be outright rejected by the BPF
> > + * verifier.
> > + *
> > + * This BPF kfunc may only be called from sleepable BPF LSM programs.
> > + *
> > + * Return: A positive integer corresponding to the length of the resolved
> > + * pathname in *buf*, including the NUL termination character. On error, a
> > + * negative integer is returned.
> > + */
> > +__bpf_kfunc int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz)
> > +{
> > +	int len;
> > +	char *ret;
> > +
> > +	if (buf__sz <= 0)
> > +		return -EINVAL;
> 
> size_t is unsigned so this should just be !buf__sz I can fix that
> though.

Sure, that would be great if you wouldn't mind?

> The __sz thing has meaning to the verifier afaict so I guess that's
> fine as name then.

That's right, it's used to signal that a buffer and it's associated
size exists within the BPF kfuncs argument list. Using the __sz
annotation specifically allows the BPF verifier to deduce which size
argument is meant to be bounded to a given buffer.

/M
Alexei Starovoitov July 26, 2024, 8:43 p.m. UTC | #3
On Fri, Jul 26, 2024 at 1:56 AM Matt Bobrowski <mattbobrowski@google.com> wrote:
> +
> +static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
> +{
> +       if (!btf_id_set8_contains(&bpf_fs_kfunc_set_ids, kfunc_id) ||
> +           prog->type == BPF_PROG_TYPE_LSM)
> +               return 0;
> +       return -EACCES;
> +}
> +
> +static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
> +       .owner = THIS_MODULE,
> +       .set = &bpf_fs_kfunc_set_ids,
> +       .filter = bpf_fs_kfuncs_filter,
> +};
> +
> +static int __init bpf_fs_kfuncs_init(void)
> +{
> +       return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
> +}

Aside from buf__sz <= 0 that Christian spotted
the bpf_fs_kfuncs_filter() is a watery water.
It's doing a redundant check that is already covered by

register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM,...

I'll remove it while applying.
Song Liu July 26, 2024, 9:25 p.m. UTC | #4
On Fri, Jul 26, 2024 at 1:56 AM Matt Bobrowski <mattbobrowski@google.com> wrote:
>
[...]
> +       len = buf + buf__sz - ret;
> +       memmove(buf, ret, len);
> +       return len;
> +}
> +__bpf_kfunc_end_defs();
> +
> +BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
> +BTF_ID_FLAGS(func, bpf_get_task_exe_file,
> +            KF_ACQUIRE | KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_RET_NULL)
> +BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE | KF_SLEEPABLE)

Do we really need KF_SLEEPABLE for bpf_put_file?

Thanks,
Song

> +BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS | KF_SLEEPABLE)
> +BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
> +
[...]
Matt Bobrowski July 26, 2024, 9:49 p.m. UTC | #5
On Fri, Jul 26, 2024 at 02:25:26PM -0700, Song Liu wrote:
> On Fri, Jul 26, 2024 at 1:56 AM Matt Bobrowski <mattbobrowski@google.com> wrote:
> >
> [...]
> > +       len = buf + buf__sz - ret;
> > +       memmove(buf, ret, len);
> > +       return len;
> > +}
> > +__bpf_kfunc_end_defs();
> > +
> > +BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
> > +BTF_ID_FLAGS(func, bpf_get_task_exe_file,
> > +            KF_ACQUIRE | KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_RET_NULL)
> > +BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE | KF_SLEEPABLE)
> 
> Do we really need KF_SLEEPABLE for bpf_put_file?

Well, the guts of fput() is annotated w/ might_sleep(), so the calling
thread may presumably be involuntarily put to sleep? You can also see
the guts of fput() invoking various indirect function calls
i.e. ->release(), and depending on the implementation of those, they
could be initiating resource release related actions which
consequently could result in waiting for some I/O to be done? fput()
also calls dput() and mntput() and these too can also do a bunch of
teardown.

Please correct me if I've misunderstood something.

/M
Song Liu July 26, 2024, 10:48 p.m. UTC | #6
On Fri, Jul 26, 2024 at 2:49 PM Matt Bobrowski <mattbobrowski@google.com> wrote:
>
> On Fri, Jul 26, 2024 at 02:25:26PM -0700, Song Liu wrote:
> > On Fri, Jul 26, 2024 at 1:56 AM Matt Bobrowski <mattbobrowski@google.com> wrote:
> > >
> > [...]
> > > +       len = buf + buf__sz - ret;
> > > +       memmove(buf, ret, len);
> > > +       return len;
> > > +}
> > > +__bpf_kfunc_end_defs();
> > > +
> > > +BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
> > > +BTF_ID_FLAGS(func, bpf_get_task_exe_file,
> > > +            KF_ACQUIRE | KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_RET_NULL)
> > > +BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE | KF_SLEEPABLE)
> >
> > Do we really need KF_SLEEPABLE for bpf_put_file?
>
> Well, the guts of fput() is annotated w/ might_sleep(), so the calling
> thread may presumably be involuntarily put to sleep? You can also see
> the guts of fput() invoking various indirect function calls
> i.e. ->release(), and depending on the implementation of those, they
> could be initiating resource release related actions which
> consequently could result in waiting for some I/O to be done? fput()
> also calls dput() and mntput() and these too can also do a bunch of
> teardown.
>
> Please correct me if I've misunderstood something.

__fput() is annotated with might_sleep(). However, fput() doesn't not
call __fput() directly. Instead, it schedules a worker to call __fput().
Therefore, it is safe to call fput() from a non-sleepable context.

Thanks,
Song
Song Liu July 26, 2024, 11:52 p.m. UTC | #7
On Fri, Jul 26, 2024 at 1:56 AM Matt Bobrowski <mattbobrowski@google.com> wrote:
>
> Add a new variant of bpf_d_path() named bpf_path_d_path() which takes
> the form of a BPF kfunc and enforces KF_TRUSTED_ARGS semantics onto
> its arguments.
>
> This new d_path() based BPF kfunc variant is intended to address the
> legacy bpf_d_path() BPF helper's susceptibility to memory corruption
> issues [0, 1, 2] by ensuring to only operate on supplied arguments
> which are deemed trusted by the BPF verifier. Typically, this means
> that only pointers to a struct path which have been referenced counted
> may be supplied.
>
> In addition to the new bpf_path_d_path() BPF kfunc, we also add a
> KF_ACQUIRE based BPF kfunc bpf_get_task_exe_file() and KF_RELEASE
> counterpart BPF kfunc bpf_put_file(). This is so that the new
> bpf_path_d_path() BPF kfunc can be used more flexibility from within
> the context of a BPF LSM program. It's rather common to ascertain the
> backing executable file for the calling process by performing the
> following walk current->mm->exe_file while instrumenting a given
> operation from the context of the BPF LSM program. However, walking
> current->mm->exe_file directly is never deemed to be OK, and doing so
> from both inside and outside of BPF LSM program context should be
> considered as a bug. Using bpf_get_task_exe_file() and in turn
> bpf_put_file() will allow BPF LSM programs to reliably get and put
> references to current->mm->exe_file.
>
> As of now, all the newly introduced BPF kfuncs within this patch are
> limited to sleepable BPF LSM program types. Therefore, they may only
> be called when a BPF LSM program is attached to one of the listed
> attachment points defined within the sleepable_lsm_hooks BTF ID set.
>
> [0] https://lore.kernel.org/bpf/CAG48ez0ppjcT=QxU-jtCUfb5xQb3mLr=5FcwddF_VKfEBPs_Dg@mail.gmail.com/
> [1] https://lore.kernel.org/bpf/20230606181714.532998-1-jolsa@kernel.org/
> [2] https://lore.kernel.org/bpf/20220219113744.1852259-1-memxor@gmail.com/
>
> Signed-off-by: Matt Bobrowski <mattbobrowski@google.com>

checkpatch reported a few syntax issues on this one:

https://netdev.bots.linux.dev/static/nipa/874023/13742510/checkpatch/stdout
Matt Bobrowski July 28, 2024, 7:52 p.m. UTC | #8
On Fri, Jul 26, 2024 at 04:52:50PM -0700, Song Liu wrote:
> On Fri, Jul 26, 2024 at 1:56 AM Matt Bobrowski <mattbobrowski@google.com> wrote:
> >
> > Add a new variant of bpf_d_path() named bpf_path_d_path() which takes
> > the form of a BPF kfunc and enforces KF_TRUSTED_ARGS semantics onto
> > its arguments.
> >
> > This new d_path() based BPF kfunc variant is intended to address the
> > legacy bpf_d_path() BPF helper's susceptibility to memory corruption
> > issues [0, 1, 2] by ensuring to only operate on supplied arguments
> > which are deemed trusted by the BPF verifier. Typically, this means
> > that only pointers to a struct path which have been referenced counted
> > may be supplied.
> >
> > In addition to the new bpf_path_d_path() BPF kfunc, we also add a
> > KF_ACQUIRE based BPF kfunc bpf_get_task_exe_file() and KF_RELEASE
> > counterpart BPF kfunc bpf_put_file(). This is so that the new
> > bpf_path_d_path() BPF kfunc can be used more flexibility from within
> > the context of a BPF LSM program. It's rather common to ascertain the
> > backing executable file for the calling process by performing the
> > following walk current->mm->exe_file while instrumenting a given
> > operation from the context of the BPF LSM program. However, walking
> > current->mm->exe_file directly is never deemed to be OK, and doing so
> > from both inside and outside of BPF LSM program context should be
> > considered as a bug. Using bpf_get_task_exe_file() and in turn
> > bpf_put_file() will allow BPF LSM programs to reliably get and put
> > references to current->mm->exe_file.
> >
> > As of now, all the newly introduced BPF kfuncs within this patch are
> > limited to sleepable BPF LSM program types. Therefore, they may only
> > be called when a BPF LSM program is attached to one of the listed
> > attachment points defined within the sleepable_lsm_hooks BTF ID set.
> >
> > [0] https://lore.kernel.org/bpf/CAG48ez0ppjcT=QxU-jtCUfb5xQb3mLr=5FcwddF_VKfEBPs_Dg@mail.gmail.com/
> > [1] https://lore.kernel.org/bpf/20230606181714.532998-1-jolsa@kernel.org/
> > [2] https://lore.kernel.org/bpf/20220219113744.1852259-1-memxor@gmail.com/
> >
> > Signed-off-by: Matt Bobrowski <mattbobrowski@google.com>
> 
> checkpatch reported a few syntax issues on this one:
> 
> https://netdev.bots.linux.dev/static/nipa/874023/13742510/checkpatch/stdout

Thanks for making aware, all has been addressed.

/M
Matt Bobrowski July 28, 2024, 8:29 p.m. UTC | #9
On Fri, Jul 26, 2024 at 03:48:45PM -0700, Song Liu wrote:
> On Fri, Jul 26, 2024 at 2:49 PM Matt Bobrowski <mattbobrowski@google.com> wrote:
> >
> > On Fri, Jul 26, 2024 at 02:25:26PM -0700, Song Liu wrote:
> > > On Fri, Jul 26, 2024 at 1:56 AM Matt Bobrowski <mattbobrowski@google.com> wrote:
> > > >
> > > [...]
> > > > +       len = buf + buf__sz - ret;
> > > > +       memmove(buf, ret, len);
> > > > +       return len;
> > > > +}
> > > > +__bpf_kfunc_end_defs();
> > > > +
> > > > +BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
> > > > +BTF_ID_FLAGS(func, bpf_get_task_exe_file,
> > > > +            KF_ACQUIRE | KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_RET_NULL)
> > > > +BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE | KF_SLEEPABLE)
> > >
> > > Do we really need KF_SLEEPABLE for bpf_put_file?
> >
> > Well, the guts of fput() is annotated w/ might_sleep(), so the calling
> > thread may presumably be involuntarily put to sleep? You can also see
> > the guts of fput() invoking various indirect function calls
> > i.e. ->release(), and depending on the implementation of those, they
> > could be initiating resource release related actions which
> > consequently could result in waiting for some I/O to be done? fput()
> > also calls dput() and mntput() and these too can also do a bunch of
> > teardown.
> >
> > Please correct me if I've misunderstood something.
> 
> __fput() is annotated with might_sleep(). However, fput() doesn't not
> call __fput() directly. Instead, it schedules a worker to call __fput().
> Therefore, it is safe to call fput() from a non-sleepable context.

Oh, yes, you're absolutely right. I failed to realize that, so my
apologies. In that case, yes, technically bpf_put_file() does not need
to be annotated w/ KF_SLEEPABLE. Now that I also think of it, one of
the other and only reasons why we made this initially sleepable is
because bpf_put_file() at the time was meant to be used exclusively
within the same context as bpf_path_d_path(), and that is currently
marked as sleepable. Although technically speaking, I think we could
also make bpf_path_d_path() not restricted to only sleepable BPF LSM
program types, and in turn that could mean that
bpf_get_task_exe_file() also doesn't need to be restricted to
sleepable BPF LSM programs.

Alexei, what do you think about relaxing the sleepable annotation
across this entire set of BPF kfuncs? From a technical perspective I
think it's OK, but I'd also like someone like Christian to confirm
that d_path() can't actually end up sleeping. Glancing over it, I
believe this to be true, but I may also be naively missing something.

/M
Matt Bobrowski July 28, 2024, 8:35 p.m. UTC | #10
On Fri, Jul 26, 2024 at 01:43:45PM -0700, Alexei Starovoitov wrote:
> On Fri, Jul 26, 2024 at 1:56 AM Matt Bobrowski <mattbobrowski@google.com> wrote:
> > +
> > +static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
> > +{
> > +       if (!btf_id_set8_contains(&bpf_fs_kfunc_set_ids, kfunc_id) ||
> > +           prog->type == BPF_PROG_TYPE_LSM)
> > +               return 0;
> > +       return -EACCES;
> > +}
> > +
> > +static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
> > +       .owner = THIS_MODULE,
> > +       .set = &bpf_fs_kfunc_set_ids,
> > +       .filter = bpf_fs_kfuncs_filter,
> > +};
> > +
> > +static int __init bpf_fs_kfuncs_init(void)
> > +{
> > +       return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
> > +}
> 
> Aside from buf__sz <= 0 that Christian spotted

I'm going to fix this up in v2 of this patch, so don't worry about it.

> the bpf_fs_kfuncs_filter() is a watery water.
> It's doing a redundant check that is already covered by
> 
> register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM,...
> 
> I'll remove it while applying.

As discussed, this filter is currently required as without it we
inadvertently allow tracing BPF programs to also use these BPF
kfuncs.

/M
Christian Brauner July 29, 2024, 10:56 a.m. UTC | #11
> think it's OK, but I'd also like someone like Christian to confirm
> that d_path() can't actually end up sleeping. Glancing over it, I

We annotated ->d_dname() as non-sleepable in locking.rst so even
->d_dname() shouldn't and curently doesn't sleep. There's a few
codepaths that end up calling d_path() under spinlocks but none of them
should end up calling anything related to ->d_name() and so wouldn't be
affected even if it did sleep.
Matt Bobrowski July 29, 2024, 11:11 a.m. UTC | #12
On Mon, Jul 29, 2024 at 12:56:54PM +0200, Christian Brauner wrote:
> > think it's OK, but I'd also like someone like Christian to confirm
> > that d_path() can't actually end up sleeping. Glancing over it, I
> 
> We annotated ->d_dname() as non-sleepable in locking.rst so even
> ->d_dname() shouldn't and curently doesn't sleep. There's a few
> codepaths that end up calling d_path() under spinlocks but none of them
> should end up calling anything related to ->d_name() and so wouldn't be
> affected even if it did sleep.

Wonderful, exactly what I had also concluded. In that case, I think we
can relax the sleepable requirement across this suite of BPF
kfuncs. Does anyone object?

/M
diff mbox series

Patch

diff --git a/fs/Makefile b/fs/Makefile
index 6ecc9b0a53f2..61679fd587b7 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -129,3 +129,4 @@  obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
 obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
 obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
+obj-$(CONFIG_BPF_LSM)		+= bpf_fs_kfuncs.o
diff --git a/fs/bpf_fs_kfuncs.c b/fs/bpf_fs_kfuncs.c
new file mode 100644
index 000000000000..3813e2a83313
--- /dev/null
+++ b/fs/bpf_fs_kfuncs.c
@@ -0,0 +1,133 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC. */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
+#include <linux/dcache.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/path.h>
+#include <linux/sched.h>
+
+__bpf_kfunc_start_defs();
+/**
+ * bpf_get_task_exe_file - get a reference on the exe_file struct file member of
+ *                         the mm_struct that is nested within the supplied
+ *                         task_struct
+ * @task: task_struct of which the nested mm_struct exe_file member to get a
+ * reference on
+ *
+ * Get a reference on the exe_file struct file member field of the mm_struct
+ * nested within the supplied *task*. The referenced file pointer acquired by
+ * this BPF kfunc must be released using bpf_put_file(). Failing to call
+ * bpf_put_file() on the returned referenced struct file pointer that has been
+ * acquired by this BPF kfunc will result in the BPF program being rejected by
+ * the BPF verifier.
+ *
+ * This BPF kfunc may only be called from sleepable BPF LSM programs.
+ *
+ * Internally, this BPF kfunc leans on get_task_exe_file(), such that calling
+ * bpf_get_task_exe_file() would be analogous to calling get_task_exe_file()
+ * directly in kernel context.
+ *
+ * Return: A referenced struct file pointer to the exe_file member of the
+ * mm_struct that is nested within the supplied *task*. On error, NULL is
+ * returned.
+ */
+__bpf_kfunc struct file *bpf_get_task_exe_file(struct task_struct *task)
+{
+	return get_task_exe_file(task);
+}
+
+/**
+ * bpf_put_file - put a reference on the supplied file
+ * @file: file to put a reference on
+ *
+ * Put a reference on the supplied *file*. Only referenced file pointers may be
+ * passed to this BPF kfunc. Attempting to pass an unreferenced file pointer, or
+ * any other arbitrary pointer for that matter, will result in the BPF program
+ * being rejected by the BPF verifier.
+ *
+ * This BPF kfunc may only be called from sleepable BPF LSM programs. Though
+ * fput() can be called from IRQ context, we're enforcing sleepability here.
+ */
+__bpf_kfunc void bpf_put_file(struct file *file)
+{
+	fput(file);
+}
+
+/**
+ * bpf_path_d_path - resolve the pathname for the supplied path
+ * @path: path to resolve the pathname for
+ * @buf: buffer to return the resolved pathname in
+ * @buf__sz: length of the supplied buffer
+ *
+ * Resolve the pathname for the supplied *path* and store it in *buf*. This BPF
+ * kfunc is the safer variant of the legacy bpf_d_path() helper and should be
+ * used in place of bpf_d_path() whenever possible. It enforces KF_TRUSTED_ARGS
+ * semantics, meaning that the supplied *path* must itself hold a valid
+ * reference, or else the BPF program will be outright rejected by the BPF
+ * verifier.
+ *
+ * This BPF kfunc may only be called from sleepable BPF LSM programs.
+ *
+ * Return: A positive integer corresponding to the length of the resolved
+ * pathname in *buf*, including the NUL termination character. On error, a
+ * negative integer is returned.
+ */
+__bpf_kfunc int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz)
+{
+	int len;
+	char *ret;
+
+	if (buf__sz <= 0)
+		return -EINVAL;
+
+	/* Usually, d_path() will never involuntarily put the calling thread to
+        * sleep. However, there could be exceptions to this as d_op->d_dname()
+        * has free rein over what it wants to do. Additionally, given that this
+        * new d_path() based BPF kfunc enforces KF_TRUSTED_ARGS, it'll likely
+        * only ever be called alongside or in similar contexts, to other
+        * supporting BPF kfuncs that may end up being put to sleep.
+        */
+	ret = d_path(path, buf, buf__sz);
+	if (IS_ERR(ret))
+		return PTR_ERR(ret);
+
+	len = buf + buf__sz - ret;
+	memmove(buf, ret, len);
+	return len;
+}
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
+BTF_ID_FLAGS(func, bpf_get_task_exe_file,
+	     KF_ACQUIRE | KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
+
+static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+	if (!btf_id_set8_contains(&bpf_fs_kfunc_set_ids, kfunc_id) ||
+	    prog->type == BPF_PROG_TYPE_LSM)
+		return 0;
+	return -EACCES;
+}
+
+static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set = &bpf_fs_kfunc_set_ids,
+	.filter = bpf_fs_kfuncs_filter,
+};
+
+static int __init bpf_fs_kfuncs_init(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
+}
+
+late_initcall(bpf_fs_kfuncs_init);