diff mbox series

[v3,bpf-next,5/6] selftests/bpf: test map percpu stats

Message ID 20230630082516.16286-6-aspsk@isovalent.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: add percpu stats for bpf_map | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 3 maintainers not CCed: mykolal@fb.com shuah@kernel.org linux-kselftest@vger.kernel.org
netdev/build_clang fail Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Comparison to NULL could be written "!skel" CHECK: spaces preferred around that '*' (ctx:VxV) CHECK: spaces preferred around that '-' (ctx:VxV) WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 102 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on s390x with gcc

Commit Message

Anton Protopopov June 30, 2023, 8:25 a.m. UTC
Add a new map test, map_percpu_stats.c, which is checking the correctness of
map's percpu elements counters.  For supported maps the test upserts a number
of elements, checks the correctness of the counters, then deletes all the
elements and checks again that the counters sum drops down to zero.

The following map types are tested:

    * BPF_MAP_TYPE_HASH, BPF_F_NO_PREALLOC
    * BPF_MAP_TYPE_PERCPU_HASH, BPF_F_NO_PREALLOC
    * BPF_MAP_TYPE_HASH,
    * BPF_MAP_TYPE_PERCPU_HASH,
    * BPF_MAP_TYPE_LRU_HASH
    * BPF_MAP_TYPE_LRU_PERCPU_HASH

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
---
 .../bpf/map_tests/map_percpu_stats.c          | 336 ++++++++++++++++++
 .../selftests/bpf/progs/map_percpu_stats.c    |  24 ++
 2 files changed, 360 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
 create mode 100644 tools/testing/selftests/bpf/progs/map_percpu_stats.c

Comments

Hou Tao July 4, 2023, 2:41 p.m. UTC | #1
Hi,

On 6/30/2023 4:25 PM, Anton Protopopov wrote:
> Add a new map test, map_percpu_stats.c, which is checking the correctness of
> map's percpu elements counters.  For supported maps the test upserts a number
> of elements, checks the correctness of the counters, then deletes all the
> elements and checks again that the counters sum drops down to zero.
>
> The following map types are tested:
>
>     * BPF_MAP_TYPE_HASH, BPF_F_NO_PREALLOC
>     * BPF_MAP_TYPE_PERCPU_HASH, BPF_F_NO_PREALLOC
>     * BPF_MAP_TYPE_HASH,
>     * BPF_MAP_TYPE_PERCPU_HASH,
>     * BPF_MAP_TYPE_LRU_HASH
>     * BPF_MAP_TYPE_LRU_PERCPU_HASH

A test for BPF_MAP_TYPE_HASH_OF_MAPS is also needed.
>
> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> ---
>  .../bpf/map_tests/map_percpu_stats.c          | 336 ++++++++++++++++++
>  .../selftests/bpf/progs/map_percpu_stats.c    |  24 ++
>  2 files changed, 360 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
>  create mode 100644 tools/testing/selftests/bpf/progs/map_percpu_stats.c
>
> diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
> new file mode 100644
> index 000000000000..5b45af230368
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
> @@ -0,0 +1,336 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2023 Isovalent */
> +
> +#include <errno.h>
> +#include <unistd.h>
> +#include <pthread.h>
> +
> +#include <bpf/bpf.h>
> +#include <bpf/libbpf.h>
> +
> +#include <bpf_util.h>
> +#include <test_maps.h>
> +
> +#include "map_percpu_stats.skel.h"
> +
> +#define MAX_ENTRIES 16384
> +#define N_THREADS 37

Why 37 thread is needed here ? Does a small number of threads work as well ?
> +
> +#define MAX_MAP_KEY_SIZE 4
> +
> +static void map_info(int map_fd, struct bpf_map_info *info)
> +{
> +	__u32 len = sizeof(*info);
> +	int ret;
> +
> +	memset(info, 0, sizeof(*info));
> +
> +	ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
> +	CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
Please use ASSERT_OK instead.
> +}
> +
> +static const char *map_type_to_s(__u32 type)
> +{
> +	switch (type) {
> +	case BPF_MAP_TYPE_HASH:
> +		return "HASH";
> +	case BPF_MAP_TYPE_PERCPU_HASH:
> +		return "PERCPU_HASH";
> +	case BPF_MAP_TYPE_LRU_HASH:
> +		return "LRU_HASH";
> +	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
> +		return "LRU_PERCPU_HASH";
> +	default:
> +		return "<define-me>";
> +	}
> +}
> +
> +/* Map i -> map-type-specific-key */
> +static void *map_key(__u32 type, __u32 i)
> +{
> +	static __thread __u8 key[MAX_MAP_KEY_SIZE];

Why a per-thread key is necessary here ? Could we just define it when
the key is needed ?
> +
> +	*(__u32 *)key = i;
> +	return key;
> +}
> +
> +static __u32 map_count_elements(__u32 type, int map_fd)
> +{
> +	void *key = map_key(type, -1);
> +	int n = 0;
> +
> +	while (!bpf_map_get_next_key(map_fd, key, key))
> +		n++;
> +	return n;
> +}
> +
> +static void delete_all_elements(__u32 type, int map_fd)
> +{
> +	void *key = map_key(type, -1);
> +	void *keys;
> +	int n = 0;
> +	int ret;
> +
> +	keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
> +	CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
> +
> +	for (; !bpf_map_get_next_key(map_fd, key, key); n++)
> +		memcpy(keys + n*MAX_MAP_KEY_SIZE, key, MAX_MAP_KEY_SIZE);
> +
> +	while (--n >= 0) {
> +		ret = bpf_map_delete_elem(map_fd, keys + n*MAX_MAP_KEY_SIZE);
> +		CHECK(ret < 0, "bpf_map_delete_elem", "error: %s\n", strerror(errno));
> +	}
> +}

Please use ASSERT_xxx() to replace CHECK().
> +
> +static bool is_lru(__u32 map_type)
> +{
> +	return map_type == BPF_MAP_TYPE_LRU_HASH ||
> +	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
> +}
> +
> +struct upsert_opts {
> +	__u32 map_type;
> +	int map_fd;
> +	__u32 n;
> +};
> +
> +static void *patch_map_thread(void *arg)
> +{
> +	struct upsert_opts *opts = arg;
> +	void *key;
> +	int val;
> +	int ret;
> +	int i;
> +
> +	for (i = 0; i < opts->n; i++) {
> +		key = map_key(opts->map_type, i);
> +		val = rand();
> +		ret = bpf_map_update_elem(opts->map_fd, key, &val, 0);
> +		CHECK(ret < 0, "bpf_map_update_elem", "error: %s\n", strerror(errno));
> +	}
> +	return NULL;
> +}
> +
> +static void upsert_elements(struct upsert_opts *opts)
> +{
> +	pthread_t threads[N_THREADS];
> +	int ret;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(threads); i++) {
> +		ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
> +		CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
> +	}
> +
> +	for (i = 0; i < ARRAY_SIZE(threads); i++) {
> +		ret = pthread_join(i[threads], NULL);
> +		CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
> +	}
> +}
> +
> +static __u32 read_cur_elements(int iter_fd)
> +{
> +	char buf[64];
> +	ssize_t n;
> +	__u32 ret;
> +
> +	n = read(iter_fd, buf, sizeof(buf)-1);
> +	CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
> +	buf[n] = '\0';
> +
> +	errno = 0;
> +	ret = (__u32)strtol(buf, NULL, 10);
> +	CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
> +
> +	return ret;
> +}
> +
> +static __u32 get_cur_elements(int map_id)
> +{
> +	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
> +	union bpf_iter_link_info linfo;
> +	struct map_percpu_stats *skel;
> +	struct bpf_link *link;
> +	int iter_fd;
> +	int ret;
> +
> +	opts.link_info = &linfo;
> +	opts.link_info_len = sizeof(linfo);
> +
> +	skel = map_percpu_stats__open();
> +	CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
> +
> +	skel->bss->target_id = map_id;
> +
> +	ret = map_percpu_stats__load(skel);
> +	CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
> +
> +	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, &opts);
> +	CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
> +
> +	iter_fd = bpf_iter_create(bpf_link__fd(link));
> +	CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
> +
> +	return read_cur_elements(iter_fd);

Need to do close(iter_fd), bpf_link__destroy(link) and
map__percpu_stats__destroy() before return, otherwise there will be
resource leak.
> +}
> +
> +static void __test(int map_fd)
> +{
> +	__u32 n = MAX_ENTRIES - 1000;
> +	__u32 real_current_elements;
> +	__u32 iter_current_elements;
> +	struct upsert_opts opts = {
> +		.map_fd = map_fd,
> +		.n = n,
> +	};
> +	struct bpf_map_info info;
> +
> +	map_info(map_fd, &info);
> +	opts.map_type = info.type;
> +
> +	/*
> +	 * Upsert keys [0, n) under some competition: with random values from
> +	 * N_THREADS threads
> +	 */
> +	upsert_elements(&opts);
> +
> +	/*
> +	 * The sum of percpu elements counters for all hashtable-based maps
> +	 * should be equal to the number of elements present in the map. For
> +	 * non-lru maps this number should be the number n of upserted
> +	 * elements. For lru maps some elements might have been evicted. Check
> +	 * that all numbers make sense
> +	 */
> +	map_info(map_fd, &info);

I think there is no need to call map_info() multiple times because the
needed type and id will not be changed after creation.
> +	real_current_elements = map_count_elements(info.type, map_fd);
> +	if (!is_lru(info.type))
> +		CHECK(n != real_current_elements, "map_count_elements",
> +		      "real_current_elements(%u) != expected(%u)\n", real_current_elements, n);
For LRU map, please use ASSERT_EQ(). For LRU map, should we check "n >=
real_current_elements" instead ?
> +
> +	iter_current_elements = get_cur_elements(info.id);
> +	CHECK(iter_current_elements != real_current_elements, "get_cur_elements",
> +	      "iter_current_elements=%u, expected %u (map_type=%s,map_flags=%08x)\n",
> +	      iter_current_elements, real_current_elements, map_type_to_s(info.type), info.map_flags);
> +
> +	/*
> +	 * Cleanup the map and check that all elements are actually gone and
> +	 * that the sum of percpu elements counters is back to 0 as well
> +	 */
> +	delete_all_elements(info.type, map_fd);
> +	map_info(map_fd, &info);
> +	real_current_elements = map_count_elements(info.type, map_fd);
> +	CHECK(real_current_elements, "map_count_elements",
> +	      "expected real_current_elements=0, got %u", real_current_elements);

ASSERT_EQ
> +
> +	iter_current_elements = get_cur_elements(info.id);
> +	CHECK(iter_current_elements != 0, "get_cur_elements",
> +	      "iter_current_elements=%u, expected 0 (map_type=%s,map_flags=%08x)\n",
> +	      iter_current_elements, map_type_to_s(info.type), info.map_flags);
> +
ASSERT_NEQ
> +	close(map_fd);
> +}
> +
> +static int map_create_opts(__u32 type, const char *name,
> +			   struct bpf_map_create_opts *map_opts,
> +			   __u32 key_size, __u32 val_size)
> +{
> +	int map_fd;
> +
> +	map_fd = bpf_map_create(type, name, key_size, val_size, MAX_ENTRIES, map_opts);
> +	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
> +			strerror(errno), name);

Please use ASSERT_GE instead.
> +
> +	return map_fd;
> +}
> +
> +static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
> +{
> +	return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
> +}
> +
> +static int create_hash(void)
> +{
> +	struct bpf_map_create_opts map_opts = {
> +		.sz = sizeof(map_opts),
> +		.map_flags = BPF_F_NO_PREALLOC,
> +	};
> +
> +	return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
> +}
> +
> +static int create_percpu_hash(void)
> +{
> +	struct bpf_map_create_opts map_opts = {
> +		.sz = sizeof(map_opts),
> +		.map_flags = BPF_F_NO_PREALLOC,
> +	};
> +
> +	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
> +}
> +
> +static int create_hash_prealloc(void)
> +{
> +	return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
> +}
> +
> +static int create_percpu_hash_prealloc(void)
> +{
> +	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
> +}
> +
> +static int create_lru_hash(void)
> +{
> +	return map_create(BPF_MAP_TYPE_LRU_HASH, "lru_hash", NULL);
> +}
> +
> +static int create_percpu_lru_hash(void)
> +{
> +	return map_create(BPF_MAP_TYPE_LRU_PERCPU_HASH, "lru_hash_percpu", NULL);
> +}
> +
> +static void map_percpu_stats_hash(void)
> +{
> +	__test(create_hash());
> +	printf("test_%s:PASS\n", __func__);
> +}
> +
> +static void map_percpu_stats_percpu_hash(void)
> +{
> +	__test(create_percpu_hash());
> +	printf("test_%s:PASS\n", __func__);
> +}
> +
> +static void map_percpu_stats_hash_prealloc(void)
> +{
> +	__test(create_hash_prealloc());
> +	printf("test_%s:PASS\n", __func__);
> +}
> +
> +static void map_percpu_stats_percpu_hash_prealloc(void)
> +{
> +	__test(create_percpu_hash_prealloc());
> +	printf("test_%s:PASS\n", __func__);
> +}
> +
> +static void map_percpu_stats_lru_hash(void)
> +{
> +	__test(create_lru_hash());
> +	printf("test_%s:PASS\n", __func__);
> +}
> +
> +static void map_percpu_stats_percpu_lru_hash(void)
> +{
> +	__test(create_percpu_lru_hash());
> +	printf("test_%s:PASS\n", __func__);

After switch to subtest, the printf() can be removed.
> +}
> +
> +void test_map_percpu_stats(void)
> +{
> +	map_percpu_stats_hash();
> +	map_percpu_stats_percpu_hash();
> +	map_percpu_stats_hash_prealloc();
> +	map_percpu_stats_percpu_hash_prealloc();
> +	map_percpu_stats_lru_hash();
> +	map_percpu_stats_percpu_lru_hash();
> +}

Please use test__start_subtest() to create multiple subtests.
> diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
> new file mode 100644
> index 000000000000..10b2325c1720
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
> @@ -0,0 +1,24 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2023 Isovalent */
> +
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +__u32 target_id;
> +
> +__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
> +
> +SEC("iter/bpf_map")
> +int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
> +{
> +	struct seq_file *seq = ctx->meta->seq;
> +	struct bpf_map *map = ctx->map;
> +
> +	if (map && map->id == target_id)
> +		BPF_SEQ_PRINTF(seq, "%lld", bpf_map_sum_elem_count(map));
> +
> +	return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
Anton Protopopov July 4, 2023, 3:02 p.m. UTC | #2
On Tue, Jul 04, 2023 at 10:41:10PM +0800, Hou Tao wrote:
> Hi,
> 
> On 6/30/2023 4:25 PM, Anton Protopopov wrote:
> > Add a new map test, map_percpu_stats.c, which is checking the correctness of
> > map's percpu elements counters.  For supported maps the test upserts a number
> > of elements, checks the correctness of the counters, then deletes all the
> > elements and checks again that the counters sum drops down to zero.
> >
> > The following map types are tested:
> >
> >     * BPF_MAP_TYPE_HASH, BPF_F_NO_PREALLOC
> >     * BPF_MAP_TYPE_PERCPU_HASH, BPF_F_NO_PREALLOC
> >     * BPF_MAP_TYPE_HASH,
> >     * BPF_MAP_TYPE_PERCPU_HASH,
> >     * BPF_MAP_TYPE_LRU_HASH
> >     * BPF_MAP_TYPE_LRU_PERCPU_HASH
> 
> A test for BPF_MAP_TYPE_HASH_OF_MAPS is also needed.

I will add it.

> >
> > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > ---
> >  .../bpf/map_tests/map_percpu_stats.c          | 336 ++++++++++++++++++
> >  .../selftests/bpf/progs/map_percpu_stats.c    |  24 ++
> >  2 files changed, 360 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/map_percpu_stats.c
> >
> > diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
> > new file mode 100644
> > index 000000000000..5b45af230368
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
> > @@ -0,0 +1,336 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright (c) 2023 Isovalent */
> > +
> > +#include <errno.h>
> > +#include <unistd.h>
> > +#include <pthread.h>
> > +
> > +#include <bpf/bpf.h>
> > +#include <bpf/libbpf.h>
> > +
> > +#include <bpf_util.h>
> > +#include <test_maps.h>
> > +
> > +#include "map_percpu_stats.skel.h"
> > +
> > +#define MAX_ENTRIES 16384
> > +#define N_THREADS 37
> 
> Why 37 thread is needed here ? Does a small number of threads work as well ?

This was used to evict more elements from LRU maps when they are full.

> > +
> > +#define MAX_MAP_KEY_SIZE 4
> > +
> > +static void map_info(int map_fd, struct bpf_map_info *info)
> > +{
> > +	__u32 len = sizeof(*info);
> > +	int ret;
> > +
> > +	memset(info, 0, sizeof(*info));
> > +
> > +	ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
> > +	CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
> Please use ASSERT_OK instead.

Ok, thanks, will do (and for all other similar cases you've mentioned below).

> > +}
> > +
> > +static const char *map_type_to_s(__u32 type)
> > +{
> > +	switch (type) {
> > +	case BPF_MAP_TYPE_HASH:
> > +		return "HASH";
> > +	case BPF_MAP_TYPE_PERCPU_HASH:
> > +		return "PERCPU_HASH";
> > +	case BPF_MAP_TYPE_LRU_HASH:
> > +		return "LRU_HASH";
> > +	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
> > +		return "LRU_PERCPU_HASH";
> > +	default:
> > +		return "<define-me>";
> > +	}
> > +}
> > +
> > +/* Map i -> map-type-specific-key */
> > +static void *map_key(__u32 type, __u32 i)
> > +{
> > +	static __thread __u8 key[MAX_MAP_KEY_SIZE];
> 
> Why a per-thread key is necessary here ? Could we just define it when
> the key is needed ?

Thanks, I will remove it to simplify code.  (This was used to create keys for
non-hash maps, e.g., LPM-trie in the first version of the patch.)

> > +
> > +	*(__u32 *)key = i;
> > +	return key;
> > +}
> > +
> > +static __u32 map_count_elements(__u32 type, int map_fd)
> > +{
> > +	void *key = map_key(type, -1);
> > +	int n = 0;
> > +
> > +	while (!bpf_map_get_next_key(map_fd, key, key))
> > +		n++;
> > +	return n;
> > +}
> > +
> > +static void delete_all_elements(__u32 type, int map_fd)
> > +{
> > +	void *key = map_key(type, -1);
> > +	void *keys;
> > +	int n = 0;
> > +	int ret;
> > +
> > +	keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
> > +	CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
> > +
> > +	for (; !bpf_map_get_next_key(map_fd, key, key); n++)
> > +		memcpy(keys + n*MAX_MAP_KEY_SIZE, key, MAX_MAP_KEY_SIZE);
> > +
> > +	while (--n >= 0) {
> > +		ret = bpf_map_delete_elem(map_fd, keys + n*MAX_MAP_KEY_SIZE);
> > +		CHECK(ret < 0, "bpf_map_delete_elem", "error: %s\n", strerror(errno));
> > +	}
> > +}
> 
> Please use ASSERT_xxx() to replace CHECK().
> > +
> > +static bool is_lru(__u32 map_type)
> > +{
> > +	return map_type == BPF_MAP_TYPE_LRU_HASH ||
> > +	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
> > +}
> > +
> > +struct upsert_opts {
> > +	__u32 map_type;
> > +	int map_fd;
> > +	__u32 n;
> > +};
> > +
> > +static void *patch_map_thread(void *arg)
> > +{
> > +	struct upsert_opts *opts = arg;
> > +	void *key;
> > +	int val;
> > +	int ret;
> > +	int i;
> > +
> > +	for (i = 0; i < opts->n; i++) {
> > +		key = map_key(opts->map_type, i);
> > +		val = rand();
> > +		ret = bpf_map_update_elem(opts->map_fd, key, &val, 0);
> > +		CHECK(ret < 0, "bpf_map_update_elem", "error: %s\n", strerror(errno));
> > +	}
> > +	return NULL;
> > +}
> > +
> > +static void upsert_elements(struct upsert_opts *opts)
> > +{
> > +	pthread_t threads[N_THREADS];
> > +	int ret;
> > +	int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(threads); i++) {
> > +		ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
> > +		CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
> > +	}
> > +
> > +	for (i = 0; i < ARRAY_SIZE(threads); i++) {
> > +		ret = pthread_join(i[threads], NULL);
> > +		CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
> > +	}
> > +}
> > +
> > +static __u32 read_cur_elements(int iter_fd)
> > +{
> > +	char buf[64];
> > +	ssize_t n;
> > +	__u32 ret;
> > +
> > +	n = read(iter_fd, buf, sizeof(buf)-1);
> > +	CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
> > +	buf[n] = '\0';
> > +
> > +	errno = 0;
> > +	ret = (__u32)strtol(buf, NULL, 10);
> > +	CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
> > +
> > +	return ret;
> > +}
> > +
> > +static __u32 get_cur_elements(int map_id)
> > +{
> > +	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
> > +	union bpf_iter_link_info linfo;
> > +	struct map_percpu_stats *skel;
> > +	struct bpf_link *link;
> > +	int iter_fd;
> > +	int ret;
> > +
> > +	opts.link_info = &linfo;
> > +	opts.link_info_len = sizeof(linfo);
> > +
> > +	skel = map_percpu_stats__open();
> > +	CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
> > +
> > +	skel->bss->target_id = map_id;
> > +
> > +	ret = map_percpu_stats__load(skel);
> > +	CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
> > +
> > +	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, &opts);
> > +	CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
> > +
> > +	iter_fd = bpf_iter_create(bpf_link__fd(link));
> > +	CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
> > +
> > +	return read_cur_elements(iter_fd);
> 
> Need to do close(iter_fd), bpf_link__destroy(link) and
> map__percpu_stats__destroy() before return, otherwise there will be
> resource leak.

Yes, thanks.

> > +}
> > +
> > +static void __test(int map_fd)
> > +{
> > +	__u32 n = MAX_ENTRIES - 1000;
> > +	__u32 real_current_elements;
> > +	__u32 iter_current_elements;
> > +	struct upsert_opts opts = {
> > +		.map_fd = map_fd,
> > +		.n = n,
> > +	};
> > +	struct bpf_map_info info;
> > +
> > +	map_info(map_fd, &info);
> > +	opts.map_type = info.type;
> > +
> > +	/*
> > +	 * Upsert keys [0, n) under some competition: with random values from
> > +	 * N_THREADS threads
> > +	 */
> > +	upsert_elements(&opts);
> > +
> > +	/*
> > +	 * The sum of percpu elements counters for all hashtable-based maps
> > +	 * should be equal to the number of elements present in the map. For
> > +	 * non-lru maps this number should be the number n of upserted
> > +	 * elements. For lru maps some elements might have been evicted. Check
> > +	 * that all numbers make sense
> > +	 */
> > +	map_info(map_fd, &info);
> 
> I think there is no need to call map_info() multiple times because the
> needed type and id will not be changed after creation.

Thanks, will fix. (This code left from the first version when the counter was
returned by map_info().)

> > +	real_current_elements = map_count_elements(info.type, map_fd);
> > +	if (!is_lru(info.type))
> > +		CHECK(n != real_current_elements, "map_count_elements",
> > +		      "real_current_elements(%u) != expected(%u)\n", real_current_elements, n);
> For LRU map, please use ASSERT_EQ(). For LRU map, should we check "n >=
> real_current_elements" instead ?
> > +
> > +	iter_current_elements = get_cur_elements(info.id);
> > +	CHECK(iter_current_elements != real_current_elements, "get_cur_elements",
> > +	      "iter_current_elements=%u, expected %u (map_type=%s,map_flags=%08x)\n",
> > +	      iter_current_elements, real_current_elements, map_type_to_s(info.type), info.map_flags);
> > +
> > +	/*
> > +	 * Cleanup the map and check that all elements are actually gone and
> > +	 * that the sum of percpu elements counters is back to 0 as well
> > +	 */
> > +	delete_all_elements(info.type, map_fd);
> > +	map_info(map_fd, &info);
> > +	real_current_elements = map_count_elements(info.type, map_fd);
> > +	CHECK(real_current_elements, "map_count_elements",
> > +	      "expected real_current_elements=0, got %u", real_current_elements);
> 
> ASSERT_EQ
> > +
> > +	iter_current_elements = get_cur_elements(info.id);
> > +	CHECK(iter_current_elements != 0, "get_cur_elements",
> > +	      "iter_current_elements=%u, expected 0 (map_type=%s,map_flags=%08x)\n",
> > +	      iter_current_elements, map_type_to_s(info.type), info.map_flags);
> > +
> ASSERT_NEQ
> > +	close(map_fd);
> > +}
> > +
> > +static int map_create_opts(__u32 type, const char *name,
> > +			   struct bpf_map_create_opts *map_opts,
> > +			   __u32 key_size, __u32 val_size)
> > +{
> > +	int map_fd;
> > +
> > +	map_fd = bpf_map_create(type, name, key_size, val_size, MAX_ENTRIES, map_opts);
> > +	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
> > +			strerror(errno), name);
> 
> Please use ASSERT_GE instead.
> > +
> > +	return map_fd;
> > +}
> > +
> > +static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
> > +{
> > +	return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
> > +}
> > +
> > +static int create_hash(void)
> > +{
> > +	struct bpf_map_create_opts map_opts = {
> > +		.sz = sizeof(map_opts),
> > +		.map_flags = BPF_F_NO_PREALLOC,
> > +	};
> > +
> > +	return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
> > +}
> > +
> > +static int create_percpu_hash(void)
> > +{
> > +	struct bpf_map_create_opts map_opts = {
> > +		.sz = sizeof(map_opts),
> > +		.map_flags = BPF_F_NO_PREALLOC,
> > +	};
> > +
> > +	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
> > +}
> > +
> > +static int create_hash_prealloc(void)
> > +{
> > +	return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
> > +}
> > +
> > +static int create_percpu_hash_prealloc(void)
> > +{
> > +	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
> > +}
> > +
> > +static int create_lru_hash(void)
> > +{
> > +	return map_create(BPF_MAP_TYPE_LRU_HASH, "lru_hash", NULL);
> > +}
> > +
> > +static int create_percpu_lru_hash(void)
> > +{
> > +	return map_create(BPF_MAP_TYPE_LRU_PERCPU_HASH, "lru_hash_percpu", NULL);
> > +}
> > +
> > +static void map_percpu_stats_hash(void)
> > +{
> > +	__test(create_hash());
> > +	printf("test_%s:PASS\n", __func__);
> > +}
> > +
> > +static void map_percpu_stats_percpu_hash(void)
> > +{
> > +	__test(create_percpu_hash());
> > +	printf("test_%s:PASS\n", __func__);
> > +}
> > +
> > +static void map_percpu_stats_hash_prealloc(void)
> > +{
> > +	__test(create_hash_prealloc());
> > +	printf("test_%s:PASS\n", __func__);
> > +}
> > +
> > +static void map_percpu_stats_percpu_hash_prealloc(void)
> > +{
> > +	__test(create_percpu_hash_prealloc());
> > +	printf("test_%s:PASS\n", __func__);
> > +}
> > +
> > +static void map_percpu_stats_lru_hash(void)
> > +{
> > +	__test(create_lru_hash());
> > +	printf("test_%s:PASS\n", __func__);
> > +}
> > +
> > +static void map_percpu_stats_percpu_lru_hash(void)
> > +{
> > +	__test(create_percpu_lru_hash());
> > +	printf("test_%s:PASS\n", __func__);
> 
> After switch to subtest, the printf() can be removed.
> > +}
> > +
> > +void test_map_percpu_stats(void)
> > +{
> > +	map_percpu_stats_hash();
> > +	map_percpu_stats_percpu_hash();
> > +	map_percpu_stats_hash_prealloc();
> > +	map_percpu_stats_percpu_hash_prealloc();
> > +	map_percpu_stats_lru_hash();
> > +	map_percpu_stats_percpu_lru_hash();
> > +}
> 
> Please use test__start_subtest() to create multiple subtests.

Thanks.

I will update this selftest in v4 with your comments addressed + batch ops
tests.

> > diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
> > new file mode 100644
> > index 000000000000..10b2325c1720
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
> > @@ -0,0 +1,24 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright (c) 2023 Isovalent */
> > +
> > +#include "vmlinux.h"
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_tracing.h>
> > +
> > +__u32 target_id;
> > +
> > +__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
> > +
> > +SEC("iter/bpf_map")
> > +int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
> > +{
> > +	struct seq_file *seq = ctx->meta->seq;
> > +	struct bpf_map *map = ctx->map;
> > +
> > +	if (map && map->id == target_id)
> > +		BPF_SEQ_PRINTF(seq, "%lld", bpf_map_sum_elem_count(map));
> > +
> > +	return 0;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";
>
Anton Protopopov July 4, 2023, 3:23 p.m. UTC | #3
On Tue, Jul 04, 2023 at 03:02:32PM +0000, Anton Protopopov wrote:
> On Tue, Jul 04, 2023 at 10:41:10PM +0800, Hou Tao wrote:
> > Hi,
> > 
> > On 6/30/2023 4:25 PM, Anton Protopopov wrote:
> > [...]
> > > +}
> > > +
> > > +void test_map_percpu_stats(void)
> > > +{
> > > +	map_percpu_stats_hash();
> > > +	map_percpu_stats_percpu_hash();
> > > +	map_percpu_stats_hash_prealloc();
> > > +	map_percpu_stats_percpu_hash_prealloc();
> > > +	map_percpu_stats_lru_hash();
> > > +	map_percpu_stats_percpu_lru_hash();
> > > +}
> > 
> > Please use test__start_subtest() to create multiple subtests.

After looking at code, I think that I will leave the individual functions here,
as the test__start_subtest() function is only implemented in test_progs (not
test_maps), and adding it here looks like out of scope for this patch.
Anton Protopopov July 4, 2023, 3:49 p.m. UTC | #4
On Tue, Jul 04, 2023 at 03:23:30PM +0000, Anton Protopopov wrote:
> On Tue, Jul 04, 2023 at 03:02:32PM +0000, Anton Protopopov wrote:
> > On Tue, Jul 04, 2023 at 10:41:10PM +0800, Hou Tao wrote:
> > > Hi,
> > > 
> > > On 6/30/2023 4:25 PM, Anton Protopopov wrote:
> > > [...]
> > > > +}
> > > > +
> > > > +void test_map_percpu_stats(void)
> > > > +{
> > > > +	map_percpu_stats_hash();
> > > > +	map_percpu_stats_percpu_hash();
> > > > +	map_percpu_stats_hash_prealloc();
> > > > +	map_percpu_stats_percpu_hash_prealloc();
> > > > +	map_percpu_stats_lru_hash();
> > > > +	map_percpu_stats_percpu_lru_hash();
> > > > +}
> > > 
> > > Please use test__start_subtest() to create multiple subtests.
> 
> After looking at code, I think that I will leave the individual functions here,
> as the test__start_subtest() function is only implemented in test_progs (not
> test_maps), and adding it here looks like out of scope for this patch.

Ah, sorry, looks like the same stands for ASSERT* macros as well, as they are
only used in test_progs. (I will still fix the checks where you commented on
specific values, like n <= cur_elems for LRUs.)
Hou Tao July 5, 2023, 12:46 a.m. UTC | #5
Hi,

On 7/4/2023 11:23 PM, Anton Protopopov wrote:
> On Tue, Jul 04, 2023 at 03:02:32PM +0000, Anton Protopopov wrote:
>> On Tue, Jul 04, 2023 at 10:41:10PM +0800, Hou Tao wrote:
>>> Hi,
>>>
>>> On 6/30/2023 4:25 PM, Anton Protopopov wrote:
>>> [...]
>>>> +}
>>>> +
>>>> +void test_map_percpu_stats(void)
>>>> +{
>>>> +	map_percpu_stats_hash();
>>>> +	map_percpu_stats_percpu_hash();
>>>> +	map_percpu_stats_hash_prealloc();
>>>> +	map_percpu_stats_percpu_hash_prealloc();
>>>> +	map_percpu_stats_lru_hash();
>>>> +	map_percpu_stats_percpu_lru_hash();
>>>> +}
>>> Please use test__start_subtest() to create multiple subtests.
> After looking at code, I think that I will leave the individual functions here,
> as the test__start_subtest() function is only implemented in test_progs (not
> test_maps), and adding it here looks like out of scope for this patch.
> .
I see. But can we just add these tests in test_progs instead which is
more flexible ?
Hou Tao July 5, 2023, 3:03 a.m. UTC | #6
Hi,

On 7/4/2023 11:02 PM, Anton Protopopov wrote:
> On Tue, Jul 04, 2023 at 10:41:10PM +0800, Hou Tao wrote:
>> Hi,
>>
>> On 6/30/2023 4:25 PM, Anton Protopopov wrote:
>>> Add a new map test, map_percpu_stats.c, which is checking the correctness of
>>> map's percpu elements counters.  For supported maps the test upserts a number
>>> of elements, checks the correctness of the counters, then deletes all the
>>> elements and checks again that the counters sum drops down to zero.
>>>
>>> The following map types are tested:
>>>
>>>     * BPF_MAP_TYPE_HASH, BPF_F_NO_PREALLOC
>>>     * BPF_MAP_TYPE_PERCPU_HASH, BPF_F_NO_PREALLOC
>>>     * BPF_MAP_TYPE_HASH,
>>>     * BPF_MAP_TYPE_PERCPU_HASH,
>>>     * BPF_MAP_TYPE_LRU_HASH
>>>     * BPF_MAP_TYPE_LRU_PERCPU_HASH
>> A test for BPF_MAP_TYPE_HASH_OF_MAPS is also needed.
We could also exercise the test for LRU map with BPF_F_NO_COMMON_LRU.
>
SNIP
>>> diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
>>> new file mode 100644
>>> index 000000000000..5b45af230368
>>> --- /dev/null
>>> +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
>>> @@ -0,0 +1,336 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/* Copyright (c) 2023 Isovalent */
>>> +
>>> +#include <errno.h>
>>> +#include <unistd.h>
>>> +#include <pthread.h>
>>> +
>>> +#include <bpf/bpf.h>
>>> +#include <bpf/libbpf.h>
>>> +
>>> +#include <bpf_util.h>
>>> +#include <test_maps.h>
>>> +
>>> +#include "map_percpu_stats.skel.h"
>>> +
>>> +#define MAX_ENTRIES 16384
>>> +#define N_THREADS 37
>> Why 37 thread is needed here ? Does a small number of threads work as well ?
> This was used to evict more elements from LRU maps when they are full.

I see. But in my understanding, for the global LRU list, the eviction
(the invocation of htab_lru_map_delete_node) will be possible if the
free element is less than LOCAL_FREE_TARGET(128) * nr_running_cpus. Now
the number of free elements is 1000 as defined in __test(), the number
of vCPU is 8 in my local VM setup (BPF CI also uses 8 vCPUs) and it is
hard to trigger the eviction because 8 * 128 is roughly equal with 1000.
So I suggest to decrease the number of free elements to 512 and the
number of threads to 8, or adjust the number of running thread and free
elements according to the number of online CPUs.
Anton Protopopov July 5, 2023, 3:34 p.m. UTC | #7
On Wed, Jul 05, 2023 at 11:03:25AM +0800, Hou Tao wrote:
> Hi,
> 
> On 7/4/2023 11:02 PM, Anton Protopopov wrote:
> > On Tue, Jul 04, 2023 at 10:41:10PM +0800, Hou Tao wrote:
> >> Hi,
> >>
> >> On 6/30/2023 4:25 PM, Anton Protopopov wrote:
> >>> Add a new map test, map_percpu_stats.c, which is checking the correctness of
> >>> map's percpu elements counters.  For supported maps the test upserts a number
> >>> of elements, checks the correctness of the counters, then deletes all the
> >>> elements and checks again that the counters sum drops down to zero.
> >>>
> >>> The following map types are tested:
> >>>
> >>>     * BPF_MAP_TYPE_HASH, BPF_F_NO_PREALLOC
> >>>     * BPF_MAP_TYPE_PERCPU_HASH, BPF_F_NO_PREALLOC
> >>>     * BPF_MAP_TYPE_HASH,
> >>>     * BPF_MAP_TYPE_PERCPU_HASH,
> >>>     * BPF_MAP_TYPE_LRU_HASH
> >>>     * BPF_MAP_TYPE_LRU_PERCPU_HASH
> >> A test for BPF_MAP_TYPE_HASH_OF_MAPS is also needed.
> We could also exercise the test for LRU map with BPF_F_NO_COMMON_LRU.

Thanks, added.

> >
> SNIP
> >>> diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
> >>> new file mode 100644
> >>> index 000000000000..5b45af230368
> >>> --- /dev/null
> >>> +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
> >>> @@ -0,0 +1,336 @@
> >>> +// SPDX-License-Identifier: GPL-2.0
> >>> +/* Copyright (c) 2023 Isovalent */
> >>> +
> >>> +#include <errno.h>
> >>> +#include <unistd.h>
> >>> +#include <pthread.h>
> >>> +
> >>> +#include <bpf/bpf.h>
> >>> +#include <bpf/libbpf.h>
> >>> +
> >>> +#include <bpf_util.h>
> >>> +#include <test_maps.h>
> >>> +
> >>> +#include "map_percpu_stats.skel.h"
> >>> +
> >>> +#define MAX_ENTRIES 16384
> >>> +#define N_THREADS 37
> >> Why 37 thread is needed here ? Does a small number of threads work as well ?
> > This was used to evict more elements from LRU maps when they are full.
> 
> I see. But in my understanding, for the global LRU list, the eviction
> (the invocation of htab_lru_map_delete_node) will be possible if the
> free element is less than LOCAL_FREE_TARGET(128) * nr_running_cpus. Now
> the number of free elements is 1000 as defined in __test(), the number
> of vCPU is 8 in my local VM setup (BPF CI also uses 8 vCPUs) and it is
> hard to trigger the eviction because 8 * 128 is roughly equal with 1000.
> So I suggest to decrease the number of free elements to 512 and the
> number of threads to 8, or adjust the number of running thread and free
> elements according to the number of online CPUs.

Yes, makes sense. I've changed the test to use 8 threads and offset of 512.
Anton Protopopov July 5, 2023, 3:41 p.m. UTC | #8
On Wed, Jul 05, 2023 at 08:46:09AM +0800, Hou Tao wrote:
> Hi,
> 
> On 7/4/2023 11:23 PM, Anton Protopopov wrote:
> > On Tue, Jul 04, 2023 at 03:02:32PM +0000, Anton Protopopov wrote:
> >> On Tue, Jul 04, 2023 at 10:41:10PM +0800, Hou Tao wrote:
> >>> Hi,
> >>>
> >>> On 6/30/2023 4:25 PM, Anton Protopopov wrote:
> >>> [...]
> >>>> +}
> >>>> +
> >>>> +void test_map_percpu_stats(void)
> >>>> +{
> >>>> +	map_percpu_stats_hash();
> >>>> +	map_percpu_stats_percpu_hash();
> >>>> +	map_percpu_stats_hash_prealloc();
> >>>> +	map_percpu_stats_percpu_hash_prealloc();
> >>>> +	map_percpu_stats_lru_hash();
> >>>> +	map_percpu_stats_percpu_lru_hash();
> >>>> +}
> >>> Please use test__start_subtest() to create multiple subtests.
> > After looking at code, I think that I will leave the individual functions here,
> > as the test__start_subtest() function is only implemented in test_progs (not
> > test_maps), and adding it here looks like out of scope for this patch.
> > .
> I see. But can we just add these tests in test_progs instead which is
> more flexible ?

I think that it makes more sense to port this test_prog flexibility to the
test_maps program. I can volunteer to do this (but not right away).
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
new file mode 100644
index 000000000000..5b45af230368
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -0,0 +1,336 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <bpf_util.h>
+#include <test_maps.h>
+
+#include "map_percpu_stats.skel.h"
+
+#define MAX_ENTRIES 16384
+#define N_THREADS 37
+
+#define MAX_MAP_KEY_SIZE 4
+
+static void map_info(int map_fd, struct bpf_map_info *info)
+{
+	__u32 len = sizeof(*info);
+	int ret;
+
+	memset(info, 0, sizeof(*info));
+
+	ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
+	CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
+}
+
+static const char *map_type_to_s(__u32 type)
+{
+	switch (type) {
+	case BPF_MAP_TYPE_HASH:
+		return "HASH";
+	case BPF_MAP_TYPE_PERCPU_HASH:
+		return "PERCPU_HASH";
+	case BPF_MAP_TYPE_LRU_HASH:
+		return "LRU_HASH";
+	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+		return "LRU_PERCPU_HASH";
+	default:
+		return "<define-me>";
+	}
+}
+
+/* Map i -> map-type-specific-key */
+static void *map_key(__u32 type, __u32 i)
+{
+	static __thread __u8 key[MAX_MAP_KEY_SIZE];
+
+	*(__u32 *)key = i;
+	return key;
+}
+
+static __u32 map_count_elements(__u32 type, int map_fd)
+{
+	void *key = map_key(type, -1);
+	int n = 0;
+
+	while (!bpf_map_get_next_key(map_fd, key, key))
+		n++;
+	return n;
+}
+
+static void delete_all_elements(__u32 type, int map_fd)
+{
+	void *key = map_key(type, -1);
+	void *keys;
+	int n = 0;
+	int ret;
+
+	keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
+	CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
+
+	for (; !bpf_map_get_next_key(map_fd, key, key); n++)
+		memcpy(keys + n*MAX_MAP_KEY_SIZE, key, MAX_MAP_KEY_SIZE);
+
+	while (--n >= 0) {
+		ret = bpf_map_delete_elem(map_fd, keys + n*MAX_MAP_KEY_SIZE);
+		CHECK(ret < 0, "bpf_map_delete_elem", "error: %s\n", strerror(errno));
+	}
+}
+
+static bool is_lru(__u32 map_type)
+{
+	return map_type == BPF_MAP_TYPE_LRU_HASH ||
+	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+struct upsert_opts {
+	__u32 map_type;
+	int map_fd;
+	__u32 n;
+};
+
+static void *patch_map_thread(void *arg)
+{
+	struct upsert_opts *opts = arg;
+	void *key;
+	int val;
+	int ret;
+	int i;
+
+	for (i = 0; i < opts->n; i++) {
+		key = map_key(opts->map_type, i);
+		val = rand();
+		ret = bpf_map_update_elem(opts->map_fd, key, &val, 0);
+		CHECK(ret < 0, "bpf_map_update_elem", "error: %s\n", strerror(errno));
+	}
+	return NULL;
+}
+
+static void upsert_elements(struct upsert_opts *opts)
+{
+	pthread_t threads[N_THREADS];
+	int ret;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++) {
+		ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
+		CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
+	}
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++) {
+		ret = pthread_join(i[threads], NULL);
+		CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
+	}
+}
+
+static __u32 read_cur_elements(int iter_fd)
+{
+	char buf[64];
+	ssize_t n;
+	__u32 ret;
+
+	n = read(iter_fd, buf, sizeof(buf)-1);
+	CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
+	buf[n] = '\0';
+
+	errno = 0;
+	ret = (__u32)strtol(buf, NULL, 10);
+	CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
+
+	return ret;
+}
+
+static __u32 get_cur_elements(int map_id)
+{
+	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	struct map_percpu_stats *skel;
+	struct bpf_link *link;
+	int iter_fd;
+	int ret;
+
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	skel = map_percpu_stats__open();
+	CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
+
+	skel->bss->target_id = map_id;
+
+	ret = map_percpu_stats__load(skel);
+	CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
+
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, &opts);
+	CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
+
+	return read_cur_elements(iter_fd);
+}
+
+static void __test(int map_fd)
+{
+	__u32 n = MAX_ENTRIES - 1000;
+	__u32 real_current_elements;
+	__u32 iter_current_elements;
+	struct upsert_opts opts = {
+		.map_fd = map_fd,
+		.n = n,
+	};
+	struct bpf_map_info info;
+
+	map_info(map_fd, &info);
+	opts.map_type = info.type;
+
+	/*
+	 * Upsert keys [0, n) under some competition: with random values from
+	 * N_THREADS threads
+	 */
+	upsert_elements(&opts);
+
+	/*
+	 * The sum of percpu elements counters for all hashtable-based maps
+	 * should be equal to the number of elements present in the map. For
+	 * non-lru maps this number should be the number n of upserted
+	 * elements. For lru maps some elements might have been evicted. Check
+	 * that all numbers make sense
+	 */
+	map_info(map_fd, &info);
+	real_current_elements = map_count_elements(info.type, map_fd);
+	if (!is_lru(info.type))
+		CHECK(n != real_current_elements, "map_count_elements",
+		      "real_current_elements(%u) != expected(%u)\n", real_current_elements, n);
+
+	iter_current_elements = get_cur_elements(info.id);
+	CHECK(iter_current_elements != real_current_elements, "get_cur_elements",
+	      "iter_current_elements=%u, expected %u (map_type=%s,map_flags=%08x)\n",
+	      iter_current_elements, real_current_elements, map_type_to_s(info.type), info.map_flags);
+
+	/*
+	 * Cleanup the map and check that all elements are actually gone and
+	 * that the sum of percpu elements counters is back to 0 as well
+	 */
+	delete_all_elements(info.type, map_fd);
+	map_info(map_fd, &info);
+	real_current_elements = map_count_elements(info.type, map_fd);
+	CHECK(real_current_elements, "map_count_elements",
+	      "expected real_current_elements=0, got %u", real_current_elements);
+
+	iter_current_elements = get_cur_elements(info.id);
+	CHECK(iter_current_elements != 0, "get_cur_elements",
+	      "iter_current_elements=%u, expected 0 (map_type=%s,map_flags=%08x)\n",
+	      iter_current_elements, map_type_to_s(info.type), info.map_flags);
+
+	close(map_fd);
+}
+
+static int map_create_opts(__u32 type, const char *name,
+			   struct bpf_map_create_opts *map_opts,
+			   __u32 key_size, __u32 val_size)
+{
+	int map_fd;
+
+	map_fd = bpf_map_create(type, name, key_size, val_size, MAX_ENTRIES, map_opts);
+	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
+			strerror(errno), name);
+
+	return map_fd;
+}
+
+static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
+{
+	return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
+}
+
+static int create_hash(void)
+{
+	struct bpf_map_create_opts map_opts = {
+		.sz = sizeof(map_opts),
+		.map_flags = BPF_F_NO_PREALLOC,
+	};
+
+	return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
+}
+
+static int create_percpu_hash(void)
+{
+	struct bpf_map_create_opts map_opts = {
+		.sz = sizeof(map_opts),
+		.map_flags = BPF_F_NO_PREALLOC,
+	};
+
+	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
+}
+
+static int create_hash_prealloc(void)
+{
+	return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
+}
+
+static int create_percpu_hash_prealloc(void)
+{
+	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
+}
+
+static int create_lru_hash(void)
+{
+	return map_create(BPF_MAP_TYPE_LRU_HASH, "lru_hash", NULL);
+}
+
+static int create_percpu_lru_hash(void)
+{
+	return map_create(BPF_MAP_TYPE_LRU_PERCPU_HASH, "lru_hash_percpu", NULL);
+}
+
+static void map_percpu_stats_hash(void)
+{
+	__test(create_hash());
+	printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_hash(void)
+{
+	__test(create_percpu_hash());
+	printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_hash_prealloc(void)
+{
+	__test(create_hash_prealloc());
+	printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_hash_prealloc(void)
+{
+	__test(create_percpu_hash_prealloc());
+	printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_lru_hash(void)
+{
+	__test(create_lru_hash());
+	printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_lru_hash(void)
+{
+	__test(create_percpu_lru_hash());
+	printf("test_%s:PASS\n", __func__);
+}
+
+void test_map_percpu_stats(void)
+{
+	map_percpu_stats_hash();
+	map_percpu_stats_percpu_hash();
+	map_percpu_stats_hash_prealloc();
+	map_percpu_stats_percpu_hash_prealloc();
+	map_percpu_stats_lru_hash();
+	map_percpu_stats_percpu_lru_hash();
+}
diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
new file mode 100644
index 000000000000..10b2325c1720
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
@@ -0,0 +1,24 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 target_id;
+
+__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct bpf_map *map = ctx->map;
+
+	if (map && map->id == target_id)
+		BPF_SEQ_PRINTF(seq, "%lld", bpf_map_sum_elem_count(map));
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";