diff mbox series

[RFC,bpf-next,v3,5/6] bpf: Add two module parameters in htab for memory benchmark

Message ID 20230429101215.111262-6-houtao@huaweicloud.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series Handle immediate reuse in bpf memory allocator | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 20 this patch: 20
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api fail Found: 'module_param' was: 0 now: 2
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Hou Tao April 29, 2023, 10:12 a.m. UTC
From: Hou Tao <houtao1@huawei.com>

Add two module parameters in htab:
* reuse_flag: possible values are 0, 2 (REUSE_AFTER_RCU_GP) or
  4 (FREE_AFTER_RCU_GP). The default value is 0 and this creates
  a hash map which does immediate reuse.
* delayed_free: possible values are 0, 1. The default value is 0 and
  the hash map will call bpf_mem_cache_free() directly. If the value
  is 1, the hash map will call bpf_mem_cache_free() after one RCU GP
  which mimics the free of bpf_cpumask.

These two module parameters are used for benchmarking purpose only and
are not intended for merging.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 kernel/bpf/hashtab.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 93009b94ac9b..8502957b8bcc 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -100,6 +100,7 @@  struct bpf_htab {
 	struct percpu_counter pcount;
 	atomic_t count;
 	bool use_percpu_counter;
+	bool delayed_free;
 	u32 n_buckets;	/* number of hash buckets */
 	u32 elem_size;	/* size of each element in bytes */
 	u32 hashrnd;
@@ -120,14 +121,24 @@  struct htab_elem {
 		};
 	};
 	union {
-		/* pointer to per-cpu pointer */
-		void *ptr_to_pptr;
+		struct {
+			/* pointer to per-cpu pointer */
+			void *ptr_to_pptr;
+			struct bpf_mem_alloc *ma;
+			struct rcu_head rcu;
+		};
 		struct bpf_lru_node lru_node;
 	};
 	u32 hash;
 	char key[] __aligned(8);
 };
 
+static int reuse_flag;
+module_param(reuse_flag, int, 0644);
+
+static bool delayed_free;
+module_param(delayed_free, bool, 0644);
+
 static inline bool htab_is_prealloc(const struct bpf_htab *htab)
 {
 	return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
@@ -539,6 +550,7 @@  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 
 	htab_init_buckets(htab);
 
+	htab->delayed_free = delayed_free;
 /* compute_batch_value() computes batch value as num_online_cpus() * 2
  * and __percpu_counter_compare() needs
  * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus()
@@ -576,7 +588,7 @@  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 				goto free_prealloc;
 		}
 	} else {
-		err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, 0);
+		err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, reuse_flag);
 		if (err)
 			goto free_map_locked;
 		if (percpu) {
@@ -878,12 +890,24 @@  static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return -ENOENT;
 }
 
-static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
+static void htab_elem_free_rcu(struct rcu_head *rcu)
+{
+	struct htab_elem *l = container_of(rcu, struct htab_elem, rcu);
+
+	bpf_mem_cache_free(l->ma, l);
+}
+
+static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l, bool destroy)
 {
 	check_and_free_fields(htab, l);
 	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
 		bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
-	bpf_mem_cache_free(&htab->ma, l);
+	if (destroy || !htab->delayed_free) {
+		bpf_mem_cache_free(&htab->ma, l);
+		return;
+	}
+	l->ma = &htab->ma;
+	call_rcu(&l->rcu, htab_elem_free_rcu);
 }
 
 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
@@ -931,7 +955,7 @@  static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 		__pcpu_freelist_push(&htab->freelist, &l->fnode);
 	} else {
 		dec_elem_count(htab);
-		htab_elem_free(htab, l);
+		htab_elem_free(htab, l, false);
 	}
 }
 
@@ -1468,7 +1492,7 @@  static void delete_all_elements(struct bpf_htab *htab)
 
 		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
 			hlist_nulls_del_rcu(&l->hash_node);
-			htab_elem_free(htab, l);
+			htab_elem_free(htab, l, true);
 		}
 	}
 	migrate_enable();
@@ -1522,6 +1546,8 @@  static void htab_map_free(struct bpf_map *map)
 	 * during bpf_mem_alloc_destroy().
 	 */
 	if (!htab_is_prealloc(htab)) {
+		if (htab->delayed_free)
+			rcu_barrier();
 		delete_all_elements(htab);
 	} else {
 		htab_free_prealloced_fields(htab);