From patchwork Thu Oct 24 07:48:14 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Namhyung Kim X-Patchwork-Id: 13848475 X-Patchwork-Delegate: bpf@iogearbox.net Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3EEB9171E6E; Thu, 24 Oct 2024 07:48:17 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729756097; cv=none; b=KNkRbvcgouq9ijn/FqN8Lfdg+Zv5UQ+KaCzXVisILqxR8/jwdpATC6xoZl5lZvyLBzcc3C4EmeimGHdydwmYJrwyc0WsX5dWESj9cKOaEJf8UKDqTqCLFEbmKj73XdiYdoJmv2dzP3tcA5157fHd4Whdzh4G7uq5oThOD1WOzXc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729756097; c=relaxed/simple; bh=Uwe0jVobKIKamY27y1nMKy0mrRAseQCvrFz6/ckVHmc=; h=From:To:Cc:Subject:Date:Message-ID:MIME-Version; b=XaBDzpozIPeLU2fcnu26iKKMtwZ2XD+0PEZ8LHLJy7dwTfwlSEYlBP1I0XmMxQuO2l493hs9Ihcbiqroyrn26ynNP5RdIhOljnYPV0TC6THTH+B+HXQjTz8oj1gXL+UEfG8pFm5iT/Fm34zsKFM5if64j5m7pjxfiOaCCDDVqMo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ciHCbXKO; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ciHCbXKO" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 40702C4CECC; Thu, 24 Oct 2024 07:48:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729756097; bh=Uwe0jVobKIKamY27y1nMKy0mrRAseQCvrFz6/ckVHmc=; h=From:To:Cc:Subject:Date:From; b=ciHCbXKOIYP1Me1h7W/WxW4ObhwCbsNFjfdXFbt4J6Qg3j28abMdGmRFXRvhhysO1 zuI1YzZm5TZpd3o32F0UBzexA+fBj+5+Ro1NEH0EHCL5XXATVMDZtcxQCwgSvSIOf5 EQBbhkw585/wb5SMhvuPhxWubHOLEfH/q6IwrJL+L6ze5Q15AemFtcKX+rKbodUuCn c4XQg9lxCc/0yKiMXk7/56JJ4r4HJoQGK9LrrZgULUjdiEBB0AQUnZ7/yE71CCiuNK qb7ZaTMhxOLMSAHjiNsfsr5Vm2rw8VmqhWsjZOykPw2UCi3pXFWL1r7XjUf832jRDG vpXyFN5pB5TTw== From: Namhyung Kim To: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko Cc: Martin KaFai Lau , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , LKML , bpf@vger.kernel.org, Andrew Morton , Christoph Lameter , Pekka Enberg , David Rientjes , Joonsoo Kim , Vlastimil Babka , Roman Gushchin , Hyeonggon Yoo <42.hyeyoo@gmail.com>, linux-mm@kvack.org, Arnaldo Carvalho de Melo , Kees Cook Subject: [PATCH v2 bpf-next 1/2] bpf: Add open coded version of kmem_cache iterator Date: Thu, 24 Oct 2024 00:48:14 -0700 Message-ID: <20241024074815.1255066-1-namhyung@kernel.org> X-Mailer: git-send-email 2.47.0.105.g07ac214952-goog Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Add a new open coded iterator for kmem_cache which can be called from a BPF program like below. It doesn't take any argument and traverses all kmem_cache entries. struct kmem_cache *pos; bpf_for_each(kmem_cache, pos) { ... } As it needs to grab slab_mutex, it should be called from sleepable BPF programs only. Also update the existing iterator code to use the open coded version internally as suggested by Andrii. Signed-off-by: Namhyung Kim --- v2) * prevent restart after the last element (Martin) * update existing code to use the open coded version (Andrii) kernel/bpf/helpers.c | 3 + kernel/bpf/kmem_cache_iter.c | 151 +++++++++++++++++++++++++---------- 2 files changed, 110 insertions(+), 44 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5c3fdb29c1b1fe53..ddddb060835bac4b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3112,6 +3112,9 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_get_kmem_cache) +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/kmem_cache_iter.c b/kernel/bpf/kmem_cache_iter.c index ebc101d7da51b57c..3ae2158d767f4526 100644 --- a/kernel/bpf/kmem_cache_iter.c +++ b/kernel/bpf/kmem_cache_iter.c @@ -8,16 +8,116 @@ #include "../../mm/slab.h" /* kmem_cache, slab_caches and slab_mutex */ +/* open-coded version */ +struct bpf_iter_kmem_cache { + __u64 __opaque[1]; +} __attribute__((aligned(8))); + +struct bpf_iter_kmem_cache_kern { + struct kmem_cache *pos; +} __attribute__((aligned(8))); + +#define KMEM_CACHE_POS_START ((void *)1L) + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) +{ + struct bpf_iter_kmem_cache_kern *kit = (void *)it; + + BUILD_BUG_ON(sizeof(*kit) > sizeof(*it)); + BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it)); + + kit->pos = KMEM_CACHE_POS_START; + return 0; +} + +__bpf_kfunc struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) +{ + struct bpf_iter_kmem_cache_kern *kit = (void *)it; + struct kmem_cache *prev = kit->pos; + struct kmem_cache *next; + bool destroy = false; + + if (!prev) + return NULL; + + mutex_lock(&slab_mutex); + + if (list_empty(&slab_caches)) { + mutex_unlock(&slab_mutex); + return NULL; + } + + if (prev == KMEM_CACHE_POS_START) + next = list_first_entry(&slab_caches, struct kmem_cache, list); + else if (list_last_entry(&slab_caches, struct kmem_cache, list) == prev) + next = NULL; + else + next = list_next_entry(prev, list); + + /* boot_caches have negative refcount, don't touch them */ + if (next && next->refcount > 0) + next->refcount++; + + /* Skip kmem_cache_destroy() for active entries */ + if (prev && prev != KMEM_CACHE_POS_START) { + if (prev->refcount > 1) + prev->refcount--; + else if (prev->refcount == 1) + destroy = true; + } + + mutex_unlock(&slab_mutex); + + if (destroy) + kmem_cache_destroy(prev); + + kit->pos = next; + return next; +} + +__bpf_kfunc void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) +{ + struct bpf_iter_kmem_cache_kern *kit = (void *)it; + struct kmem_cache *s = kit->pos; + bool destroy = false; + + if (s == NULL || s == KMEM_CACHE_POS_START) + return; + + mutex_lock(&slab_mutex); + + /* Skip kmem_cache_destroy() for active entries */ + if (s->refcount > 1) + s->refcount--; + else if (s->refcount == 1) + destroy = true; + + mutex_unlock(&slab_mutex); + + if (destroy) + kmem_cache_destroy(s); +} + +__bpf_kfunc_end_defs(); + struct bpf_iter__kmem_cache { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct kmem_cache *, s); }; +union kmem_cache_iter_priv { + struct bpf_iter_kmem_cache it; + struct bpf_iter_kmem_cache_kern kit; +}; + static void *kmem_cache_iter_seq_start(struct seq_file *seq, loff_t *pos) { loff_t cnt = 0; bool found = false; struct kmem_cache *s; + union kmem_cache_iter_priv *p = seq->private; mutex_lock(&slab_mutex); @@ -43,8 +143,9 @@ static void *kmem_cache_iter_seq_start(struct seq_file *seq, loff_t *pos) mutex_unlock(&slab_mutex); if (!found) - return NULL; + s = NULL; + p->kit.pos = s; return s; } @@ -55,63 +156,24 @@ static void kmem_cache_iter_seq_stop(struct seq_file *seq, void *v) .meta = &meta, .s = v, }; + union kmem_cache_iter_priv *p = seq->private; struct bpf_prog *prog; - bool destroy = false; meta.seq = seq; prog = bpf_iter_get_info(&meta, true); if (prog && !ctx.s) bpf_iter_run_prog(prog, &ctx); - if (ctx.s == NULL) - return; - - mutex_lock(&slab_mutex); - - /* Skip kmem_cache_destroy() for active entries */ - if (ctx.s->refcount > 1) - ctx.s->refcount--; - else if (ctx.s->refcount == 1) - destroy = true; - - mutex_unlock(&slab_mutex); - - if (destroy) - kmem_cache_destroy(ctx.s); + bpf_iter_kmem_cache_destroy(&p->it); } static void *kmem_cache_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct kmem_cache *s = v; - struct kmem_cache *next = NULL; - bool destroy = false; + union kmem_cache_iter_priv *p = seq->private; ++*pos; - mutex_lock(&slab_mutex); - - if (list_last_entry(&slab_caches, struct kmem_cache, list) != s) { - next = list_next_entry(s, list); - - WARN_ON_ONCE(next->refcount == 0); - - /* boot_caches have negative refcount, don't touch them */ - if (next->refcount > 0) - next->refcount++; - } - - /* Skip kmem_cache_destroy() for active entries */ - if (s->refcount > 1) - s->refcount--; - else if (s->refcount == 1) - destroy = true; - - mutex_unlock(&slab_mutex); - - if (destroy) - kmem_cache_destroy(s); - - return next; + return bpf_iter_kmem_cache_next(&p->it); } static int kmem_cache_iter_seq_show(struct seq_file *seq, void *v) @@ -143,6 +205,7 @@ BTF_ID_LIST_GLOBAL_SINGLE(bpf_kmem_cache_btf_id, struct, kmem_cache) static const struct bpf_iter_seq_info kmem_cache_iter_seq_info = { .seq_ops = &kmem_cache_iter_seq_ops, + .seq_priv_size = sizeof(union kmem_cache_iter_priv), }; static void bpf_iter_kmem_cache_show_fdinfo(const struct bpf_iter_aux_info *aux, From patchwork Thu Oct 24 07:48:15 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Namhyung Kim X-Patchwork-Id: 13848476 X-Patchwork-Delegate: bpf@iogearbox.net Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 939C31ADFE3; Thu, 24 Oct 2024 07:48:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729756098; cv=none; b=AIHOsHock13f729EjkPWBCWQmuRDMbSQJ4ybHeh9Cf/7T01pFidtkR040/4Zwhw1ECszm9Ah+NYWABZoVvhZ3i2d7eboRSq3gXGlX/omEDtRtcK+of4sfW3F848DzD0EUI+hFAEF/j7AMLiGyuSCrRXHnlgLd7MwqFOTAZRLuYs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729756098; c=relaxed/simple; bh=KdTfSCDjkkqD9bhaV9lSbcv8lSsiidIcVwYLU1/0JAQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Gba65mVjCbe+b45iGWcmJ69wqeprVv0q6MqIfHpg65aFvRoti20fvykwwlViPaladp+21KWtYW24DQA+cVA4+geIXxH+jCbZpsKdYEFkIdY7fwrwjAt1FQ973wGxhXBVdOwt+37VHp2vlAfzE/mgSplB5gW4QKmbTk3nyXxqibo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=mA6rk5G3; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="mA6rk5G3" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3E0E6C4AF11; Thu, 24 Oct 2024 07:48:17 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729756098; bh=KdTfSCDjkkqD9bhaV9lSbcv8lSsiidIcVwYLU1/0JAQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=mA6rk5G3BGxp468AygJtj6VOjozKki9/lKuCqmUT91hD8jmQD8QvPqfluHvvGz4dV zLCIMhwYXsTgxeWQeb4bKbxOOn2DFu2BcEMkg3T4VDDPup3/UapJeX8li+Kd0Ac3kl EnCYJ2XNHaYUBwQ8FAf5HthmVf59uCYX/qelabnWho6I+6qroQjPsSgJwDz/p0tMZ+ sUJ7SuBGuoUSZXwIPXBn7Rx5FAFoLCXiLC57Md2STUL0ZCyL6Rh/63iICV3Y7/Utuv 9o4uMc1+xbCsaML7WJi0srwZXiLhfloZqnX8YByDhKOBeab1IoYlEucCeew/Y41bUo 3I3lNoFUrnC6Q== From: Namhyung Kim To: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko Cc: Martin KaFai Lau , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , LKML , bpf@vger.kernel.org, Andrew Morton , Christoph Lameter , Pekka Enberg , David Rientjes , Joonsoo Kim , Vlastimil Babka , Roman Gushchin , Hyeonggon Yoo <42.hyeyoo@gmail.com>, linux-mm@kvack.org, Arnaldo Carvalho de Melo , Kees Cook Subject: [PATCH v2 bpf-next 2/2] selftests/bpf: Add a test for open coded kmem_cache iter Date: Thu, 24 Oct 2024 00:48:15 -0700 Message-ID: <20241024074815.1255066-2-namhyung@kernel.org> X-Mailer: git-send-email 2.47.0.105.g07ac214952-goog In-Reply-To: <20241024074815.1255066-1-namhyung@kernel.org> References: <20241024074815.1255066-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net The new subtest is attached to sleepable fentry of syncfs() syscall. It iterates the kmem_cache using bpf_for_each loop and count the number of entries. Finally it checks it with the number of entries from the regular iterator. $ ./vmtest.sh -- ./test_progs -t kmem_cache_iter ... #130/1 kmem_cache_iter/check_task_struct:OK #130/2 kmem_cache_iter/check_slabinfo:OK #130/3 kmem_cache_iter/open_coded_iter:OK #130 kmem_cache_iter:OK Summary: 1/3 PASSED, 0 SKIPPED, 0 FAILED Also simplify the code by using attach routine of the skeleton. Signed-off-by: Namhyung Kim --- v2) * remove unnecessary detach (Martin) * check pid in syncfs to prevent surprise (Martin) * remove unnecessary local variable (Andrii) .../testing/selftests/bpf/bpf_experimental.h | 6 ++++ .../bpf/prog_tests/kmem_cache_iter.c | 28 +++++++++++-------- .../selftests/bpf/progs/kmem_cache_iter.c | 28 +++++++++++++++++++ 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index b0668f29f7b394eb..cd8ecd39c3f3c68d 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -582,4 +582,10 @@ extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) + +struct bpf_iter_kmem_cache; +extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym; +extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym; +extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 848d8fc9171fae45..778b55bc1f912b98 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -68,12 +68,20 @@ static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel) fclose(fp); } +static void subtest_kmem_cache_iter_open_coded(struct kmem_cache_iter *skel) +{ + skel->bss->tgid = getpid(); + + /* To trigger the open coded iterator attached to the syscall */ + syncfs(0); + + /* It should be same as we've seen from the explicit iterator */ + ASSERT_EQ(skel->bss->open_coded_seen, skel->bss->kmem_cache_seen, "open_code_seen_eq"); +} + void test_kmem_cache_iter(void) { - DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct kmem_cache_iter *skel = NULL; - union bpf_iter_link_info linfo = {}; - struct bpf_link *link; char buf[256]; int iter_fd; @@ -81,16 +89,12 @@ void test_kmem_cache_iter(void) if (!ASSERT_OK_PTR(skel, "kmem_cache_iter__open_and_load")) return; - opts.link_info = &linfo; - opts.link_info_len = sizeof(linfo); - - link = bpf_program__attach_iter(skel->progs.slab_info_collector, &opts); - if (!ASSERT_OK_PTR(link, "attach_iter")) + if (!ASSERT_OK(kmem_cache_iter__attach(skel), "skel_attach")) goto destroy; - iter_fd = bpf_iter_create(bpf_link__fd(link)); + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.slab_info_collector)); if (!ASSERT_GE(iter_fd, 0, "iter_create")) - goto free_link; + goto destroy; memset(buf, 0, sizeof(buf)); while (read(iter_fd, buf, sizeof(buf) > 0)) { @@ -105,11 +109,11 @@ void test_kmem_cache_iter(void) subtest_kmem_cache_iter_check_task_struct(skel); if (test__start_subtest("check_slabinfo")) subtest_kmem_cache_iter_check_slabinfo(skel); + if (test__start_subtest("open_coded_iter")) + subtest_kmem_cache_iter_open_coded(skel); close(iter_fd); -free_link: - bpf_link__destroy(link); destroy: kmem_cache_iter__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/kmem_cache_iter.c b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c index 72c9dafecd98406b..e62807caa7593604 100644 --- a/tools/testing/selftests/bpf/progs/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c @@ -2,6 +2,8 @@ /* Copyright (c) 2024 Google */ #include "bpf_iter.h" +#include "bpf_experimental.h" +#include "bpf_misc.h" #include #include @@ -30,9 +32,12 @@ struct { extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym; +unsigned int tgid; + /* Result, will be checked by userspace */ int task_struct_found; int kmem_cache_seen; +int open_coded_seen; SEC("iter/kmem_cache") int slab_info_collector(struct bpf_iter__kmem_cache *ctx) @@ -85,3 +90,26 @@ int BPF_PROG(check_task_struct) task_struct_found = -2; return 0; } + +SEC("fentry.s/" SYS_PREFIX "sys_syncfs") +int open_coded_iter(const void *ctx) +{ + struct kmem_cache *s; + + if (tgid != bpf_get_current_pid_tgid() >> 32) + return 0; + + bpf_for_each(kmem_cache, s) { + struct kmem_cache_result *r; + + r = bpf_map_lookup_elem(&slab_result, &open_coded_seen); + if (!r) + break; + + open_coded_seen++; + + if (r->obj_size != s->size) + break; + } + return 0; +}