diff mbox series

[v3,bpf-next,3/3] selftests/bpf: Add selftests for cpumask iter

Message ID 20240117024823.4186-4-laoar.shao@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Add bpf_iter_cpumask | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning CHECK: Comparison to NULL could be written "!cgrp" WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: externs should be avoided in .c files WARNING: line length of 89 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc

Commit Message

Yafang Shao Jan. 17, 2024, 2:48 a.m. UTC
Within the BPF program, we leverage the cgroup iterator to iterate through
percpu runqueue data, specifically the 'nr_running' metric. Subsequently
 we expose this data to userspace by means of a sequence file.

The CPU affinity for the cpumask is determined by the PID of a task:

- PID of the init task (PID 1)
  We typically don't set CPU affinity for init task and thus we can iterate
  across all possible CPUs. However, in scenarios where you've set CPU
  affinity for the init task, you should set the cpumask of your current
  task to full-F. Then proceed to iterate through all possible CPUs using
  the current task.
- PID of a task with defined CPU affinity
  The aim here is to iterate through a specific cpumask. This scenario
  aligns with tasks residing within a cpuset cgroup.
- Invalid PID (e.g., PID -1)
  No cpumask is available in this case.

The result as follows,
  #65/1    cpumask_iter/init_pid:OK
  #65/2    cpumask_iter/invalid_pid:OK
  #65/3    cpumask_iter/self_pid_one_cpu:OK
  #65/4    cpumask_iter/self_pid_multi_cpus:OK
  #65      cpumask_iter:OK
  Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED

CONFIG_PSI=y is required for this testcase.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 tools/testing/selftests/bpf/config            |   1 +
 .../selftests/bpf/prog_tests/cpumask_iter.c   | 134 ++++++++++++++++++
 .../selftests/bpf/progs/cpumask_common.h      |   3 +
 .../selftests/bpf/progs/test_cpumask_iter.c   |  56 ++++++++
 4 files changed, 194 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_cpumask_iter.c

Comments

Yonghong Song Jan. 18, 2024, 11:46 p.m. UTC | #1
On 1/16/24 6:48 PM, Yafang Shao wrote:
> Within the BPF program, we leverage the cgroup iterator to iterate through
> percpu runqueue data, specifically the 'nr_running' metric. Subsequently
>   we expose this data to userspace by means of a sequence file.
>
> The CPU affinity for the cpumask is determined by the PID of a task:
>
> - PID of the init task (PID 1)
>    We typically don't set CPU affinity for init task and thus we can iterate
>    across all possible CPUs. However, in scenarios where you've set CPU
>    affinity for the init task, you should set the cpumask of your current
>    task to full-F. Then proceed to iterate through all possible CPUs using

Wat is full-F? It would be good if you can clarify in the commit message.

>    the current task.
> - PID of a task with defined CPU affinity
>    The aim here is to iterate through a specific cpumask. This scenario
>    aligns with tasks residing within a cpuset cgroup.
> - Invalid PID (e.g., PID -1)
>    No cpumask is available in this case.
>
> The result as follows,
>    #65/1    cpumask_iter/init_pid:OK
>    #65/2    cpumask_iter/invalid_pid:OK
>    #65/3    cpumask_iter/self_pid_one_cpu:OK
>    #65/4    cpumask_iter/self_pid_multi_cpus:OK
>    #65      cpumask_iter:OK
>    Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED
>
> CONFIG_PSI=y is required for this testcase.
>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>   tools/testing/selftests/bpf/config            |   1 +
>   .../selftests/bpf/prog_tests/cpumask_iter.c   | 134 ++++++++++++++++++
>   .../selftests/bpf/progs/cpumask_common.h      |   3 +
>   .../selftests/bpf/progs/test_cpumask_iter.c   |  56 ++++++++
>   4 files changed, 194 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
>   create mode 100644 tools/testing/selftests/bpf/progs/test_cpumask_iter.c
>
> diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
> index c125c441abc7..9c42568ed376 100644
> --- a/tools/testing/selftests/bpf/config
> +++ b/tools/testing/selftests/bpf/config
> @@ -78,6 +78,7 @@ CONFIG_NF_CONNTRACK_MARK=y
>   CONFIG_NF_DEFRAG_IPV4=y
>   CONFIG_NF_DEFRAG_IPV6=y
>   CONFIG_NF_NAT=y
> +CONFIG_PSI=y
>   CONFIG_RC_CORE=y
>   CONFIG_SECURITY=y
>   CONFIG_SECURITYFS=y
> diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
> new file mode 100644
> index 000000000000..984d01d09d79
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
> @@ -0,0 +1,134 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
> +
> +#define _GNU_SOURCE
> +#include <sched.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +
> +#include <test_progs.h>
> +#include "cgroup_helpers.h"
> +#include "test_cpumask_iter.skel.h"
> +
> +static void verify_percpu_data(struct bpf_link *link, int nr_cpu_exp, int nr_running_exp)
> +{
> +	int iter_fd, len, item, nr_running, psi_running, nr_cpus;
> +	static char buf[128];

why static?

> +	size_t left;
> +	char *p;
> +
> +	iter_fd = bpf_iter_create(bpf_link__fd(link));
> +	if (!ASSERT_GE(iter_fd, 0, "iter_fd"))
> +		return;
> +
> +	memset(buf, 0, sizeof(buf));
> +	left = ARRAY_SIZE(buf);
> +	p = buf;
> +	while ((len = read(iter_fd, p, left)) > 0) {
> +		p += len;
> +		left -= len;
> +	}
> +
> +	item = sscanf(buf, "nr_running %u nr_cpus %u psi_running %u\n",
> +		      &nr_running, &nr_cpus, &psi_running);
> +	if (nr_cpu_exp == -1) {
> +		ASSERT_EQ(item, -1, "seq_format");
> +		goto out;
> +	}
> +
> +	ASSERT_EQ(item, 3, "seq_format");
> +	ASSERT_GE(nr_running, nr_running_exp, "nr_running");
> +	ASSERT_GE(psi_running, nr_running_exp, "psi_running");
> +	ASSERT_EQ(nr_cpus, nr_cpu_exp, "nr_cpus");
> +
> +	/* read() after iter finishes should be ok. */
> +	if (len == 0)
> +		ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");

The above 'if' statement is irrelevant to the main purpose of this test
and can be removed.

> +
> +out:
> +	close(iter_fd);
> +}
> +
> +void test_cpumask_iter(void)
> +{
> +	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
> +	int nr_possible, cgrp_fd, pid, err, cnt, i;
> +	struct test_cpumask_iter *skel = NULL;

= NULL is not needed.

> +	union bpf_iter_link_info linfo;
> +	int cpu_ids[] = {1, 3, 4, 5};
> +	struct bpf_link *link;
> +	cpu_set_t set;
> +
> +	skel = test_cpumask_iter__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "test_for_each_cpu__open_and_load"))
> +		return;
> +
> +	if (setup_cgroup_environment())
> +		goto destroy;
> +
> +	/* Utilize the cgroup iter */
> +	cgrp_fd = get_root_cgroup();
> +	if (!ASSERT_GE(cgrp_fd, 0, "create cgrp"))
> +		goto cleanup;
> +
> +	memset(&linfo, 0, sizeof(linfo));
> +	linfo.cgroup.cgroup_fd = cgrp_fd;
> +	linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
> +	opts.link_info = &linfo;
> +	opts.link_info_len = sizeof(linfo);
> +
> +	link = bpf_program__attach_iter(skel->progs.cpu_cgroup, &opts);
> +	if (!ASSERT_OK_PTR(link, "attach_iter"))
> +		goto close_fd;
> +
> +	skel->bss->target_pid = 1;
> +	/* In case init task is set CPU affinity */
> +	err = sched_getaffinity(1, sizeof(set), &set);
> +	if (!ASSERT_OK(err, "setaffinity"))
> +		goto close_fd;

goto free_link.

> +
> +	cnt = CPU_COUNT(&set);
> +	nr_possible = bpf_num_possible_cpus();
> +	if (test__start_subtest("init_pid"))
> +		/* curent task is running. */
> +		verify_percpu_data(link, cnt, cnt == nr_possible ? 1 : 0);
[...]
Yafang Shao Jan. 21, 2024, 2:45 a.m. UTC | #2
On Fri, Jan 19, 2024 at 7:46 AM Yonghong Song <yonghong.song@linux.dev> wrote:
>
>
> On 1/16/24 6:48 PM, Yafang Shao wrote:
> > Within the BPF program, we leverage the cgroup iterator to iterate through
> > percpu runqueue data, specifically the 'nr_running' metric. Subsequently
> >   we expose this data to userspace by means of a sequence file.
> >
> > The CPU affinity for the cpumask is determined by the PID of a task:
> >
> > - PID of the init task (PID 1)
> >    We typically don't set CPU affinity for init task and thus we can iterate
> >    across all possible CPUs. However, in scenarios where you've set CPU
> >    affinity for the init task, you should set the cpumask of your current
> >    task to full-F. Then proceed to iterate through all possible CPUs using
>
> Wat is full-F? It would be good if you can clarify in the commit message.

I mean set all available CPUs for the task.
Will clarify it in the next version.

>
> >    the current task.
> > - PID of a task with defined CPU affinity
> >    The aim here is to iterate through a specific cpumask. This scenario
> >    aligns with tasks residing within a cpuset cgroup.
> > - Invalid PID (e.g., PID -1)
> >    No cpumask is available in this case.
> >
> > The result as follows,
> >    #65/1    cpumask_iter/init_pid:OK
> >    #65/2    cpumask_iter/invalid_pid:OK
> >    #65/3    cpumask_iter/self_pid_one_cpu:OK
> >    #65/4    cpumask_iter/self_pid_multi_cpus:OK
> >    #65      cpumask_iter:OK
> >    Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED
> >
> > CONFIG_PSI=y is required for this testcase.
> >
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > ---
> >   tools/testing/selftests/bpf/config            |   1 +
> >   .../selftests/bpf/prog_tests/cpumask_iter.c   | 134 ++++++++++++++++++
> >   .../selftests/bpf/progs/cpumask_common.h      |   3 +
> >   .../selftests/bpf/progs/test_cpumask_iter.c   |  56 ++++++++
> >   4 files changed, 194 insertions(+)
> >   create mode 100644 tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
> >   create mode 100644 tools/testing/selftests/bpf/progs/test_cpumask_iter.c
> >
> > diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
> > index c125c441abc7..9c42568ed376 100644
> > --- a/tools/testing/selftests/bpf/config
> > +++ b/tools/testing/selftests/bpf/config
> > @@ -78,6 +78,7 @@ CONFIG_NF_CONNTRACK_MARK=y
> >   CONFIG_NF_DEFRAG_IPV4=y
> >   CONFIG_NF_DEFRAG_IPV6=y
> >   CONFIG_NF_NAT=y
> > +CONFIG_PSI=y
> >   CONFIG_RC_CORE=y
> >   CONFIG_SECURITY=y
> >   CONFIG_SECURITYFS=y
> > diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
> > new file mode 100644
> > index 000000000000..984d01d09d79
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
> > @@ -0,0 +1,134 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
> > +
> > +#define _GNU_SOURCE
> > +#include <sched.h>
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +
> > +#include <test_progs.h>
> > +#include "cgroup_helpers.h"
> > +#include "test_cpumask_iter.skel.h"
> > +
> > +static void verify_percpu_data(struct bpf_link *link, int nr_cpu_exp, int nr_running_exp)
> > +{
> > +     int iter_fd, len, item, nr_running, psi_running, nr_cpus;
> > +     static char buf[128];
>
> why static?

Will remove it.

>
> > +     size_t left;
> > +     char *p;
> > +
> > +     iter_fd = bpf_iter_create(bpf_link__fd(link));
> > +     if (!ASSERT_GE(iter_fd, 0, "iter_fd"))
> > +             return;
> > +
> > +     memset(buf, 0, sizeof(buf));
> > +     left = ARRAY_SIZE(buf);
> > +     p = buf;
> > +     while ((len = read(iter_fd, p, left)) > 0) {
> > +             p += len;
> > +             left -= len;
> > +     }
> > +
> > +     item = sscanf(buf, "nr_running %u nr_cpus %u psi_running %u\n",
> > +                   &nr_running, &nr_cpus, &psi_running);
> > +     if (nr_cpu_exp == -1) {
> > +             ASSERT_EQ(item, -1, "seq_format");
> > +             goto out;
> > +     }
> > +
> > +     ASSERT_EQ(item, 3, "seq_format");
> > +     ASSERT_GE(nr_running, nr_running_exp, "nr_running");
> > +     ASSERT_GE(psi_running, nr_running_exp, "psi_running");
> > +     ASSERT_EQ(nr_cpus, nr_cpu_exp, "nr_cpus");
> > +
> > +     /* read() after iter finishes should be ok. */
> > +     if (len == 0)
> > +             ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
>
> The above 'if' statement is irrelevant to the main purpose of this test
> and can be removed.

Will remove it.

>
> > +
> > +out:
> > +     close(iter_fd);
> > +}
> > +
> > +void test_cpumask_iter(void)
> > +{
> > +     DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
> > +     int nr_possible, cgrp_fd, pid, err, cnt, i;
> > +     struct test_cpumask_iter *skel = NULL;
>
> = NULL is not needed.

Will change it.

>
> > +     union bpf_iter_link_info linfo;
> > +     int cpu_ids[] = {1, 3, 4, 5};
> > +     struct bpf_link *link;
> > +     cpu_set_t set;
> > +
> > +     skel = test_cpumask_iter__open_and_load();
> > +     if (!ASSERT_OK_PTR(skel, "test_for_each_cpu__open_and_load"))
> > +             return;
> > +
> > +     if (setup_cgroup_environment())
> > +             goto destroy;
> > +
> > +     /* Utilize the cgroup iter */
> > +     cgrp_fd = get_root_cgroup();
> > +     if (!ASSERT_GE(cgrp_fd, 0, "create cgrp"))
> > +             goto cleanup;
> > +
> > +     memset(&linfo, 0, sizeof(linfo));
> > +     linfo.cgroup.cgroup_fd = cgrp_fd;
> > +     linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
> > +     opts.link_info = &linfo;
> > +     opts.link_info_len = sizeof(linfo);
> > +
> > +     link = bpf_program__attach_iter(skel->progs.cpu_cgroup, &opts);
> > +     if (!ASSERT_OK_PTR(link, "attach_iter"))
> > +             goto close_fd;
> > +
> > +     skel->bss->target_pid = 1;
> > +     /* In case init task is set CPU affinity */
> > +     err = sched_getaffinity(1, sizeof(set), &set);
> > +     if (!ASSERT_OK(err, "setaffinity"))
> > +             goto close_fd;
>
> goto free_link.

Nice catch. will change it.

>
> > +
> > +     cnt = CPU_COUNT(&set);
> > +     nr_possible = bpf_num_possible_cpus();
> > +     if (test__start_subtest("init_pid"))
> > +             /* curent task is running. */
> > +             verify_percpu_data(link, cnt, cnt == nr_possible ? 1 : 0);
> [...]
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index c125c441abc7..9c42568ed376 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -78,6 +78,7 @@  CONFIG_NF_CONNTRACK_MARK=y
 CONFIG_NF_DEFRAG_IPV4=y
 CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_NF_NAT=y
+CONFIG_PSI=y
 CONFIG_RC_CORE=y
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
new file mode 100644
index 000000000000..984d01d09d79
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c
@@ -0,0 +1,134 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cpumask_iter.skel.h"
+
+static void verify_percpu_data(struct bpf_link *link, int nr_cpu_exp, int nr_running_exp)
+{
+	int iter_fd, len, item, nr_running, psi_running, nr_cpus;
+	static char buf[128];
+	size_t left;
+	char *p;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_GE(iter_fd, 0, "iter_fd"))
+		return;
+
+	memset(buf, 0, sizeof(buf));
+	left = ARRAY_SIZE(buf);
+	p = buf;
+	while ((len = read(iter_fd, p, left)) > 0) {
+		p += len;
+		left -= len;
+	}
+
+	item = sscanf(buf, "nr_running %u nr_cpus %u psi_running %u\n",
+		      &nr_running, &nr_cpus, &psi_running);
+	if (nr_cpu_exp == -1) {
+		ASSERT_EQ(item, -1, "seq_format");
+		goto out;
+	}
+
+	ASSERT_EQ(item, 3, "seq_format");
+	ASSERT_GE(nr_running, nr_running_exp, "nr_running");
+	ASSERT_GE(psi_running, nr_running_exp, "psi_running");
+	ASSERT_EQ(nr_cpus, nr_cpu_exp, "nr_cpus");
+
+	/* read() after iter finishes should be ok. */
+	if (len == 0)
+		ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
+
+out:
+	close(iter_fd);
+}
+
+void test_cpumask_iter(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	int nr_possible, cgrp_fd, pid, err, cnt, i;
+	struct test_cpumask_iter *skel = NULL;
+	union bpf_iter_link_info linfo;
+	int cpu_ids[] = {1, 3, 4, 5};
+	struct bpf_link *link;
+	cpu_set_t set;
+
+	skel = test_cpumask_iter__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_for_each_cpu__open_and_load"))
+		return;
+
+	if (setup_cgroup_environment())
+		goto destroy;
+
+	/* Utilize the cgroup iter */
+	cgrp_fd = get_root_cgroup();
+	if (!ASSERT_GE(cgrp_fd, 0, "create cgrp"))
+		goto cleanup;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.cgroup.cgroup_fd = cgrp_fd;
+	linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	link = bpf_program__attach_iter(skel->progs.cpu_cgroup, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
+		goto close_fd;
+
+	skel->bss->target_pid = 1;
+	/* In case init task is set CPU affinity */
+	err = sched_getaffinity(1, sizeof(set), &set);
+	if (!ASSERT_OK(err, "setaffinity"))
+		goto close_fd;
+
+	cnt = CPU_COUNT(&set);
+	nr_possible = bpf_num_possible_cpus();
+	if (test__start_subtest("init_pid"))
+		/* curent task is running. */
+		verify_percpu_data(link, cnt, cnt == nr_possible ? 1 : 0);
+
+	skel->bss->target_pid = -1;
+	if (test__start_subtest("invalid_pid"))
+		verify_percpu_data(link, -1, -1);
+
+	pid = getpid();
+	skel->bss->target_pid = pid;
+	CPU_ZERO(&set);
+	CPU_SET(0, &set);
+	err = sched_setaffinity(pid, sizeof(set), &set);
+	if (!ASSERT_OK(err, "setaffinity"))
+		goto free_link;
+
+	if (test__start_subtest("self_pid_one_cpu"))
+		verify_percpu_data(link, 1, 1);
+
+	/* Assume there are at least 8 CPUs on the testbed */
+	if (nr_possible < 8)
+		goto free_link;
+
+	CPU_ZERO(&set);
+	/* Set the CPU affinitiy: 1,3-5 */
+	for (i = 0; i < ARRAY_SIZE(cpu_ids); i++)
+		CPU_SET(cpu_ids[i], &set);
+	err = sched_setaffinity(pid, sizeof(set), &set);
+	if (!ASSERT_OK(err, "setaffinity"))
+		goto free_link;
+
+	if (test__start_subtest("self_pid_multi_cpus"))
+		verify_percpu_data(link, ARRAY_SIZE(cpu_ids), 1);
+
+free_link:
+	bpf_link__destroy(link);
+close_fd:
+	close(cgrp_fd);
+cleanup:
+	cleanup_cgroup_environment();
+destroy:
+	test_cpumask_iter__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 0cd4aebb97cf..cdb9dc95e9d9 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -55,6 +55,9 @@  void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym
 u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym;
 u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym;
 u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
+int bpf_iter_cpumask_new(struct bpf_iter_cpumask *it, const struct cpumask *mask) __ksym;
+int *bpf_iter_cpumask_next(struct bpf_iter_cpumask *it) __ksym;
+void bpf_iter_cpumask_destroy(struct bpf_iter_cpumask *it) __ksym;
 
 void bpf_rcu_read_lock(void) __ksym;
 void bpf_rcu_read_unlock(void) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/test_cpumask_iter.c b/tools/testing/selftests/bpf/progs/test_cpumask_iter.c
new file mode 100644
index 000000000000..cb8b8359516b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cpumask_iter.c
@@ -0,0 +1,56 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "task_kfunc_common.h"
+#include "cpumask_common.h"
+
+extern const struct psi_group_cpu system_group_pcpu __ksym __weak;
+extern const struct rq runqueues __ksym __weak;
+
+int target_pid;
+
+SEC("iter.s/cgroup")
+int BPF_PROG(cpu_cgroup, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	u32 nr_running = 0, psi_nr_running = 0, nr_cpus = 0;
+	struct psi_group_cpu *groupc;
+	struct task_struct *p;
+	struct rq *rq;
+	int *cpu;
+
+	/* epilogue */
+	if (cgrp == NULL)
+		return 0;
+
+	bpf_rcu_read_lock();
+	p = bpf_task_from_pid(target_pid);
+	if (!p) {
+		bpf_rcu_read_unlock();
+		return 1;
+	}
+
+	bpf_for_each(cpumask, cpu, p->cpus_ptr) {
+		rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, *cpu);
+		if (!rq)
+			continue;
+		nr_running += rq->nr_running;
+		nr_cpus += 1;
+
+		groupc = (struct psi_group_cpu *)bpf_per_cpu_ptr(&system_group_pcpu, *cpu);
+		if (!groupc)
+			continue;
+		psi_nr_running += groupc->tasks[NR_RUNNING];
+	}
+	BPF_SEQ_PRINTF(meta->seq, "nr_running %u nr_cpus %u psi_running %u\n",
+		       nr_running, nr_cpus, psi_nr_running);
+
+	bpf_task_release(p);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";