diff mbox series

[bpf-next,v9,1/5] bpf: Introduce task_file open-coded iterator kfuncs

Message ID AM6PR03MB50802EA81C89D22791CCF09099EC2@AM6PR03MB5080.eurprd03.prod.outlook.com (mailing list archive)
State New
Delegated to: BPF
Headers show
Series bpf: Add open-coded style process file iterator and bpf_fget_task() kfunc | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 13 of 13 maintainers
netdev/build_clang success Errors and warnings before: 21 this patch: 21
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 66 this patch: 69
netdev/checkpatch warning WARNING: line length of 95 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc fail Errors and warnings before: 11 this patch: 13
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-19 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Juntong Deng Jan. 27, 2025, 11:46 p.m. UTC
This patch adds the open-coded iterator style process file iterator
kfuncs bpf_iter_task_file_{new,next,destroy} that iterates over all
files opened by the specified process.

bpf_iter_task_file_next returns a pointer to bpf_iter_task_file_item,
which currently contains *task, *file, fd. This is an extensible
structure that enables compatibility with different versions
through CO-RE.

The reference to struct file acquired by the previous
bpf_iter_task_file_next() is released in the next
bpf_iter_task_file_next(), and the last reference is released in the
last bpf_iter_task_file_next() that returns NULL.

In the bpf_iter_task_file_destroy(), if the iterator does not iterate to
the end, then the last struct file reference is released at this time.

Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
---
 kernel/bpf/helpers.c   |  3 ++
 kernel/bpf/task_iter.c | 90 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+)

Comments

Alexei Starovoitov Jan. 30, 2025, 2:35 a.m. UTC | #1
On Mon, Jan 27, 2025 at 3:48 PM Juntong Deng <juntong.deng@outlook.com> wrote:
>
> This patch adds the open-coded iterator style process file iterator
> kfuncs bpf_iter_task_file_{new,next,destroy} that iterates over all
> files opened by the specified process.
>
> bpf_iter_task_file_next returns a pointer to bpf_iter_task_file_item,
> which currently contains *task, *file, fd. This is an extensible
> structure that enables compatibility with different versions
> through CO-RE.
>
> The reference to struct file acquired by the previous
> bpf_iter_task_file_next() is released in the next
> bpf_iter_task_file_next(), and the last reference is released in the
> last bpf_iter_task_file_next() that returns NULL.
>
> In the bpf_iter_task_file_destroy(), if the iterator does not iterate to
> the end, then the last struct file reference is released at this time.
>
> Signed-off-by: Juntong Deng <juntong.deng@outlook.com>

All patches look fine,
but we need an ack from Christian for patches 3 and 4 to proceed.
Christian Brauner Jan. 30, 2025, 4:04 p.m. UTC | #2
On Mon, Jan 27, 2025 at 11:46:50PM +0000, Juntong Deng wrote:
> This patch adds the open-coded iterator style process file iterator
> kfuncs bpf_iter_task_file_{new,next,destroy} that iterates over all
> files opened by the specified process.
> 
> bpf_iter_task_file_next returns a pointer to bpf_iter_task_file_item,
> which currently contains *task, *file, fd. This is an extensible
> structure that enables compatibility with different versions
> through CO-RE.
> 
> The reference to struct file acquired by the previous
> bpf_iter_task_file_next() is released in the next
> bpf_iter_task_file_next(), and the last reference is released in the
> last bpf_iter_task_file_next() that returns NULL.
> 
> In the bpf_iter_task_file_destroy(), if the iterator does not iterate to
> the end, then the last struct file reference is released at this time.
> 
> Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
> ---

I deeply dislike that this allows bpf programs to iterate through
another tasks files more than what is already possible with the
task_file_seq_* bpf api.

This here means that bpf programs have access to all file types that
exist in the kernel. From general simple filesystem files to pidfds, kvm
fd, epoll fd, drm fds - anything you can think of. And then do arbitrary
and ever expanding stuff with those files from the iterator with less
restrictions (if I'm reading this right) than the task_file_seq_*
iterator.

Possibly even keeping that reference for a long time leading to weird
EBUSY issues for filesystem shutdown and similar problems.

This is a bad idea. Even in the kernel we only allow this type of
iteration for procfs and procfs-like usage and there we hold references
to files from another task for a very short time when we e.g., access
/proc/<PID>/fd/.

And you already have an iterator for that with task_file_seq_get_*()
even if it is more work.

I'm also not at all swayed by the fact that this is coming out of an
effort to move CRIU into bpf just to make things easier. Not a selling
point as we do have CRIU and I don't think we need to now put more CRIU
related stuff into the kernel.

So this will not get an ACK from me. I'm putting Al and Linus here as
well as they might have opinions on this and might disagree with me.

>  kernel/bpf/helpers.c   |  3 ++
>  kernel/bpf/task_iter.c | 90 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 93 insertions(+)
> 
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index f27ce162427a..359c5bbf4814 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3157,6 +3157,9 @@ BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
>  BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
>  BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
> +BTF_ID_FLAGS(func, bpf_iter_task_file_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
> +BTF_ID_FLAGS(func, bpf_iter_task_file_next, KF_ITER_NEXT | KF_RET_NULL)
> +BTF_ID_FLAGS(func, bpf_iter_task_file_destroy, KF_ITER_DESTROY)
>  BTF_ID_FLAGS(func, bpf_dynptr_adjust)
>  BTF_ID_FLAGS(func, bpf_dynptr_is_null)
>  BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
> index 98d9b4c0daff..24a5af67e6c8 100644
> --- a/kernel/bpf/task_iter.c
> +++ b/kernel/bpf/task_iter.c
> @@ -1027,6 +1027,96 @@ __bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it)
>  {
>  }
>  
> +struct bpf_iter_task_file_item {
> +	struct task_struct *task;
> +	struct file *file;
> +	unsigned int fd;
> +};
> +
> +struct bpf_iter_task_file {
> +	__u64 __opaque[4];
> +} __aligned(8);
> +
> +struct bpf_iter_task_file_kern {
> +	struct bpf_iter_task_file_item item;
> +	unsigned int next_fd;
> +} __aligned(8);
> +
> +/**
> + * bpf_iter_task_file_new() - Initialize a new task file iterator for a task,
> + * used to iterate over all files opened by a specified task
> + *
> + * @it: the new bpf_iter_task_file to be created
> + * @task: a pointer pointing to the task to be iterated over
> + */
> +__bpf_kfunc int bpf_iter_task_file_new(struct bpf_iter_task_file *it, struct task_struct *task)
> +{
> +	struct bpf_iter_task_file_kern *kit = (void *)it;
> +	struct bpf_iter_task_file_item *item = &kit->item;
> +
> +	BUILD_BUG_ON(sizeof(struct bpf_iter_task_file_kern) > sizeof(struct bpf_iter_task_file));
> +	BUILD_BUG_ON(__alignof__(struct bpf_iter_task_file_kern) !=
> +		     __alignof__(struct bpf_iter_task_file));
> +
> +	item->task = get_task_struct(task);
> +	item->file = NULL;
> +	item->fd = 0;
> +	kit->next_fd = 0;
> +
> +	return 0;
> +}
> +
> +/**
> + * bpf_iter_task_file_next() - Get the next file in bpf_iter_task_file
> + *
> + * bpf_iter_task_file_next acquires a reference to the struct file.
> + *
> + * The reference to struct file acquired by the previous
> + * bpf_iter_task_file_next() is released in the next bpf_iter_task_file_next(),
> + * and the last reference is released in the last bpf_iter_task_file_next()
> + * that returns NULL.
> + *
> + * @it: the bpf_iter_task_file to be checked
> + *
> + * @returns a pointer to bpf_iter_task_file_item
> + */
> +__bpf_kfunc struct bpf_iter_task_file_item *bpf_iter_task_file_next(struct bpf_iter_task_file *it)
> +{
> +	struct bpf_iter_task_file_kern *kit = (void *)it;
> +	struct bpf_iter_task_file_item *item = &kit->item;
> +
> +	if (item->file)
> +		fput(item->file);
> +
> +	item->file = fget_task_next(item->task, &kit->next_fd);
> +	if (!item->file)
> +		return NULL;
> +
> +	item->fd = kit->next_fd;
> +	kit->next_fd++;
> +
> +	return item;
> +}
> +
> +/**
> + * bpf_iter_task_file_destroy() - Destroy a bpf_iter_task_file
> + *
> + * If the iterator does not iterate to the end, then the last
> + * struct file reference is released at this time.
> + *
> + * @it: the bpf_iter_task_file to be destroyed
> + */
> +__bpf_kfunc void bpf_iter_task_file_destroy(struct bpf_iter_task_file *it)
> +{
> +	struct bpf_iter_task_file_kern *kit = (void *)it;
> +	struct bpf_iter_task_file_item *item = &kit->item;
> +
> +	if (item->file)
> +		fput(item->file);
> +
> +	put_task_struct(item->task);
> +}
> +
>  __bpf_kfunc_end_defs();
>  
>  DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
> -- 
> 2.39.5
>
Linus Torvalds Jan. 30, 2025, 4:35 p.m. UTC | #3
On Thu, 30 Jan 2025 at 08:04, Christian Brauner <brauner@kernel.org> wrote:
>
> I deeply dislike that this allows bpf programs to iterate through
> another tasks files more than what is already possible with the
> task_file_seq_* bpf api.

Ack. This needs to just die.

There is no excuse for this, and no, CRIU is absolutely *not* that excuse.

In fact, CRIU is a huge red flag, and has caused endless issues before.

We should absolutely not add special BPF interfaces for CRIU. It only
leads to pain and misery.

           Linus
diff mbox series

Patch

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index f27ce162427a..359c5bbf4814 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3157,6 +3157,9 @@  BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
 BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_iter_task_file_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_iter_task_file_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_task_file_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_dynptr_adjust)
 BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 98d9b4c0daff..24a5af67e6c8 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -1027,6 +1027,96 @@  __bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it)
 {
 }
 
+struct bpf_iter_task_file_item {
+	struct task_struct *task;
+	struct file *file;
+	unsigned int fd;
+};
+
+struct bpf_iter_task_file {
+	__u64 __opaque[4];
+} __aligned(8);
+
+struct bpf_iter_task_file_kern {
+	struct bpf_iter_task_file_item item;
+	unsigned int next_fd;
+} __aligned(8);
+
+/**
+ * bpf_iter_task_file_new() - Initialize a new task file iterator for a task,
+ * used to iterate over all files opened by a specified task
+ *
+ * @it: the new bpf_iter_task_file to be created
+ * @task: a pointer pointing to the task to be iterated over
+ */
+__bpf_kfunc int bpf_iter_task_file_new(struct bpf_iter_task_file *it, struct task_struct *task)
+{
+	struct bpf_iter_task_file_kern *kit = (void *)it;
+	struct bpf_iter_task_file_item *item = &kit->item;
+
+	BUILD_BUG_ON(sizeof(struct bpf_iter_task_file_kern) > sizeof(struct bpf_iter_task_file));
+	BUILD_BUG_ON(__alignof__(struct bpf_iter_task_file_kern) !=
+		     __alignof__(struct bpf_iter_task_file));
+
+	item->task = get_task_struct(task);
+	item->file = NULL;
+	item->fd = 0;
+	kit->next_fd = 0;
+
+	return 0;
+}
+
+/**
+ * bpf_iter_task_file_next() - Get the next file in bpf_iter_task_file
+ *
+ * bpf_iter_task_file_next acquires a reference to the struct file.
+ *
+ * The reference to struct file acquired by the previous
+ * bpf_iter_task_file_next() is released in the next bpf_iter_task_file_next(),
+ * and the last reference is released in the last bpf_iter_task_file_next()
+ * that returns NULL.
+ *
+ * @it: the bpf_iter_task_file to be checked
+ *
+ * @returns a pointer to bpf_iter_task_file_item
+ */
+__bpf_kfunc struct bpf_iter_task_file_item *bpf_iter_task_file_next(struct bpf_iter_task_file *it)
+{
+	struct bpf_iter_task_file_kern *kit = (void *)it;
+	struct bpf_iter_task_file_item *item = &kit->item;
+
+	if (item->file)
+		fput(item->file);
+
+	item->file = fget_task_next(item->task, &kit->next_fd);
+	if (!item->file)
+		return NULL;
+
+	item->fd = kit->next_fd;
+	kit->next_fd++;
+
+	return item;
+}
+
+/**
+ * bpf_iter_task_file_destroy() - Destroy a bpf_iter_task_file
+ *
+ * If the iterator does not iterate to the end, then the last
+ * struct file reference is released at this time.
+ *
+ * @it: the bpf_iter_task_file to be destroyed
+ */
+__bpf_kfunc void bpf_iter_task_file_destroy(struct bpf_iter_task_file *it)
+{
+	struct bpf_iter_task_file_kern *kit = (void *)it;
+	struct bpf_iter_task_file_item *item = &kit->item;
+
+	if (item->file)
+		fput(item->file);
+
+	put_task_struct(item->task);
+}
+
 __bpf_kfunc_end_defs();
 
 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);