diff mbox series

[bpf-next,v2,4/4] selftests/bpf: Test concurrent updates on bpf_task_storage_busy

Message ID 20220901061938.3789460-5-houtao@huaweicloud.com (mailing list archive)
State Accepted
Commit 73b97bc78b32eb739a7dd3394fa3981e8021c0ef
Delegated to: BPF
Headers show
Series Use this_cpu_xxx for preemption-safety | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-4 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with llvm-16
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 6 maintainers not CCed: shuah@kernel.org linux-kselftest@vger.kernel.org song@kernel.org brauner@kernel.org martin.lau@linux.dev mykolal@fb.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: externs should be avoided in .c files WARNING: line length of 82 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-9 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs_no_alu32 on x86_64 with llvm-16

Commit Message

Hou Tao Sept. 1, 2022, 6:19 a.m. UTC
From: Hou Tao <houtao1@huawei.com>

Under full preemptible kernel, task local storage lookup operations on
the same CPU may update per-cpu bpf_task_storage_busy concurrently. If
the update of bpf_task_storage_busy is not preemption safe, the final
value of bpf_task_storage_busy may become not-zero forever and
bpf_task_storage_trylock() will always fail. So add a test case to
ensure the update of bpf_task_storage_busy is preemption safe.

Will skip the test case when CONFIG_PREEMPT is disabled, and it can only
reproduce the problem probabilistically. By increasing
TASK_STORAGE_MAP_NR_LOOP and running it under ARM64 VM with 4-cpus, it
takes about four rounds to reproduce:

> test_maps is modified to only run test_task_storage_map_stress_lookup()
$ export TASK_STORAGE_MAP_NR_THREAD=256
$ export TASK_STORAGE_MAP_NR_LOOP=81920
$ export TASK_STORAGE_MAP_PIN_CPU=1
$ time ./test_maps
test_task_storage_map_stress_lookup(135):FAIL:bad bpf_task_storage_busy got -2

real    0m24.743s
user    0m6.772s
sys     0m17.966s

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 .../bpf/map_tests/task_storage_map.c          | 122 ++++++++++++++++++
 .../bpf/progs/read_bpf_task_storage_busy.c    |  39 ++++++
 2 files changed, 161 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/map_tests/task_storage_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c

Comments

Martin KaFai Lau Sept. 1, 2022, 7:37 p.m. UTC | #1
On Thu, Sep 01, 2022 at 02:19:38PM +0800, Hou Tao wrote:
> +void test_task_storage_map_stress_lookup(void)
> +{
> +#define MAX_NR_THREAD 4096
> +	unsigned int i, nr = 256, loop = 8192, cpu = 0;
> +	struct read_bpf_task_storage_busy *skel;
> +	pthread_t tids[MAX_NR_THREAD];
> +	struct lookup_ctx ctx;
> +	cpu_set_t old, new;
> +	const char *cfg;
> +	int err;
> +
> +	cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
> +	if (cfg) {
> +		nr = atoi(cfg);
> +		if (nr > MAX_NR_THREAD)
> +			nr = MAX_NR_THREAD;
> +	}
> +	cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
> +	if (cfg)
> +		loop = atoi(cfg);
> +	cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
> +	if (cfg)
> +		cpu = atoi(cfg);
> +
> +	skel = read_bpf_task_storage_busy__open_and_load();
> +	err = libbpf_get_error(skel);
> +	CHECK(err, "open_and_load", "error %d\n", err);
> +
> +	/* Only for a fully preemptible kernel */
> +	if (!skel->kconfig->CONFIG_PREEMPT)
> +		return;
> +
> +	/* Save the old affinity setting */
> +	sched_getaffinity(getpid(), sizeof(old), &old);
> +
> +	/* Pinned on a specific CPU */
> +	CPU_ZERO(&new);
> +	CPU_SET(cpu, &new);
> +	sched_setaffinity(getpid(), sizeof(new), &new);
> +
> +	ctx.start = false;
> +	ctx.stop = false;
> +	ctx.pid_fd = sys_pidfd_open(getpid(), 0);
> +	ctx.map_fd = bpf_map__fd(skel->maps.task);
> +	ctx.loop = loop;
> +	for (i = 0; i < nr; i++) {
> +		err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
> +		if (err) {
> +			abort_lookup(&ctx, tids, i);
> +			CHECK(err, "pthread_create", "error %d\n", err);
> +			goto out;
> +		}
> +	}
> +
> +	ctx.start = true;
> +	for (i = 0; i < nr; i++)
> +		pthread_join(tids[i], NULL);
> +
> +	skel->bss->pid = getpid();
> +	err = read_bpf_task_storage_busy__attach(skel);
> +	CHECK(err, "attach", "error %d\n", err);
> +
> +	/* Trigger program */
> +	syscall(SYS_gettid);
> +	skel->bss->pid = 0;
> +
> +	CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
Applied.  One nit.
Please follow up with a test PASS or SKIP printf.
There is a 'skips' counter in test_maps.c that
is good to bump also.
Hou Tao Sept. 2, 2022, 3:16 a.m. UTC | #2
Hi,

On 9/2/2022 3:37 AM, Martin KaFai Lau wrote:
> On Thu, Sep 01, 2022 at 02:19:38PM +0800, Hou Tao wrote:
>> +void test_task_storage_map_stress_lookup(void)
>> +{
SNIP
>> +	ctx.start = true;
>> +	for (i = 0; i < nr; i++)
>> +		pthread_join(tids[i], NULL);
>> +
>> +	skel->bss->pid = getpid();
>> +	err = read_bpf_task_storage_busy__attach(skel);
>> +	CHECK(err, "attach", "error %d\n", err);
>> +
>> +	/* Trigger program */
>> +	syscall(SYS_gettid);
>> +	skel->bss->pid = 0;
>> +
>> +	CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
> Applied.  One nit.
> Please follow up with a test PASS or SKIP printf.
> There is a 'skips' counter in test_maps.c that
> is good to bump also.
Will send a follow-up patch to do it. Thanks.
>
> .
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
new file mode 100644
index 000000000000..1adc9c292eb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
@@ -0,0 +1,122 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_maps.h"
+#include "task_local_storage_helpers.h"
+#include "read_bpf_task_storage_busy.skel.h"
+
+struct lookup_ctx {
+	bool start;
+	bool stop;
+	int pid_fd;
+	int map_fd;
+	int loop;
+};
+
+static void *lookup_fn(void *arg)
+{
+	struct lookup_ctx *ctx = arg;
+	long value;
+	int i = 0;
+
+	while (!ctx->start)
+		usleep(1);
+
+	while (!ctx->stop && i++ < ctx->loop)
+		bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
+	return NULL;
+}
+
+static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
+{
+	unsigned int i;
+
+	ctx->stop = true;
+	ctx->start = true;
+	for (i = 0; i < nr; i++)
+		pthread_join(tids[i], NULL);
+}
+
+void test_task_storage_map_stress_lookup(void)
+{
+#define MAX_NR_THREAD 4096
+	unsigned int i, nr = 256, loop = 8192, cpu = 0;
+	struct read_bpf_task_storage_busy *skel;
+	pthread_t tids[MAX_NR_THREAD];
+	struct lookup_ctx ctx;
+	cpu_set_t old, new;
+	const char *cfg;
+	int err;
+
+	cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
+	if (cfg) {
+		nr = atoi(cfg);
+		if (nr > MAX_NR_THREAD)
+			nr = MAX_NR_THREAD;
+	}
+	cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
+	if (cfg)
+		loop = atoi(cfg);
+	cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
+	if (cfg)
+		cpu = atoi(cfg);
+
+	skel = read_bpf_task_storage_busy__open_and_load();
+	err = libbpf_get_error(skel);
+	CHECK(err, "open_and_load", "error %d\n", err);
+
+	/* Only for a fully preemptible kernel */
+	if (!skel->kconfig->CONFIG_PREEMPT)
+		return;
+
+	/* Save the old affinity setting */
+	sched_getaffinity(getpid(), sizeof(old), &old);
+
+	/* Pinned on a specific CPU */
+	CPU_ZERO(&new);
+	CPU_SET(cpu, &new);
+	sched_setaffinity(getpid(), sizeof(new), &new);
+
+	ctx.start = false;
+	ctx.stop = false;
+	ctx.pid_fd = sys_pidfd_open(getpid(), 0);
+	ctx.map_fd = bpf_map__fd(skel->maps.task);
+	ctx.loop = loop;
+	for (i = 0; i < nr; i++) {
+		err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
+		if (err) {
+			abort_lookup(&ctx, tids, i);
+			CHECK(err, "pthread_create", "error %d\n", err);
+			goto out;
+		}
+	}
+
+	ctx.start = true;
+	for (i = 0; i < nr; i++)
+		pthread_join(tids[i], NULL);
+
+	skel->bss->pid = getpid();
+	err = read_bpf_task_storage_busy__attach(skel);
+	CHECK(err, "attach", "error %d\n", err);
+
+	/* Trigger program */
+	syscall(SYS_gettid);
+	skel->bss->pid = 0;
+
+	CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
+out:
+	read_bpf_task_storage_busy__destroy(skel);
+	/* Restore affinity setting */
+	sched_setaffinity(getpid(), sizeof(old), &old);
+}
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
new file mode 100644
index 000000000000..a47bb0120719
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
@@ -0,0 +1,39 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern bool CONFIG_PREEMPT __kconfig __weak;
+extern const int bpf_task_storage_busy __ksym;
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+int busy = 0;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} task SEC(".maps");
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(read_bpf_task_storage_busy)
+{
+	int *value;
+	int key;
+
+	if (!CONFIG_PREEMPT)
+		return 0;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
+	if (value)
+		busy = *value;
+
+	return 0;
+}