diff mbox series

[bpf-next,1/3] bpf: Introduce local_storage exclusive caching option

Message ID 20220420002143.1096548-2-davemarchevsky@fb.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Introduce local_storage exclusive caching | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter warning Series does not have a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1812 this patch: 1812
netdev/cc_maintainers warning 9 maintainers not CCed: songliubraving@fb.com davem@davemloft.net netdev@vger.kernel.org pabeni@redhat.com joannelkoong@gmail.com john.fastabend@gmail.com yhs@fb.com haoluo@google.com kuba@kernel.org
netdev/build_clang fail Errors and warnings before: 200 this patch: 200
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1822 this patch: 1822
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for Kernel LATEST on ubuntu-latest + selftests
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Kernel LATEST on z15 + selftests

Commit Message

Dave Marchevsky April 20, 2022, 12:21 a.m. UTC
Allow local_storage maps to claim exclusive use of a cache slot in
struct bpf_local_storage's cache. When a local_storage map claims a
slot and is cached in via bpf_local_storage_lookup, it will not be
replaced until the map is free'd. As a result, after a local_storage map
is alloc'd for a specific bpf_local_storage, lookup calls after the
first will quickly find the correct map.

When requesting an exclusive cache slot, bpf_local_storage_cache_idx_get
can now fail if all slots are already claimed. Because a map's cache_idx
is assigned when the bpf_map is allocated - which occurs before the
program runs - the map load and subsequent prog load will fail.

A bit in struct bpf_map's map_extra is used to designate whether a map
would like to claim an exclusive slot. Similarly, bitmap idx_exclusive
is added to bpf_local_storage_cache to track whether a slot is
exclusively claimed. Functions that manipulate the cache are modified to
test for BPF_LOCAL_STORAGE_FORCE_CACHE bit and test/set idx_exclusive
where necessary.

When a map exclusively claims a cache slot, non-exclusive local_storage
maps which were previously assigned the same cache_idx are not
migrated to unclaimed cache_idx. Such a migration would require full
iteration of the cache list and necessitate a reverse migration on map
free to even things out. Since a used cache slot will only be
exclusively claimed if no empty slot exists, the additional complexity
was deemed unnecessary.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
---
 include/linux/bpf_local_storage.h |  6 +++--
 include/uapi/linux/bpf.h          | 14 +++++++++++
 kernel/bpf/bpf_inode_storage.c    | 16 +++++++++---
 kernel/bpf/bpf_local_storage.c    | 42 +++++++++++++++++++++++++------
 kernel/bpf/bpf_task_storage.c     | 16 +++++++++---
 kernel/bpf/syscall.c              |  7 ++++--
 net/core/bpf_sk_storage.c         | 15 ++++++++---
 7 files changed, 95 insertions(+), 21 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 493e63258497..d87405a1b65d 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -109,6 +109,7 @@  struct bpf_local_storage {
 struct bpf_local_storage_cache {
 	spinlock_t idx_lock;
 	u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE];
+	DECLARE_BITMAP(idx_exclusive, BPF_LOCAL_STORAGE_CACHE_SIZE);
 };
 
 #define DEFINE_BPF_STORAGE_CACHE(name)				\
@@ -116,9 +117,10 @@  static struct bpf_local_storage_cache name = {			\
 	.idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock),	\
 }
 
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
+int bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache,
+				    u64 flags);
 void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
-				      u16 idx);
+				       u16 idx, u64 flags);
 
 /* Helper functions for bpf_local_storage */
 int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d14b10b85e51..566035bc2f08 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1257,6 +1257,18 @@  enum bpf_stack_build_id_status {
 	BPF_STACK_BUILD_ID_IP = 2,
 };
 
+/* Flags passed in map_extra when creating local_storage maps
+ * of types: BPF_MAP_TYPE_INODE_STORAGE
+ *           BPF_MAP_TYPE_TASK_STORAGE
+ *           BPF_MAP_TYPE_SK_STORAGE
+ */
+enum bpf_local_storage_extra_flags {
+	/* Give the map exclusive use of a local_storage cache slot
+	 * or fail map alloc
+	 */
+	BPF_LOCAL_STORAGE_FORCE_CACHE = (1U << 0),
+};
+
 #define BPF_BUILD_ID_SIZE 20
 struct bpf_stack_build_id {
 	__s32		status;
@@ -1296,6 +1308,8 @@  union bpf_attr {
 		 * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
 		 * number of hash functions (if 0, the bloom filter will default
 		 * to using 5 hash functions).
+		 * BPF_MAP_TYPE_{INODE,TASK,SK}_STORAGE - local_storage specific
+		 * flags (see bpf_local_storage_extra_flags)
 		 */
 		__u64	map_extra;
 	};
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 96be8d518885..8b32adc23fc3 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -227,12 +227,21 @@  static int notsupp_get_next_key(struct bpf_map *map, void *key,
 static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_local_storage_map *smap;
+	int cache_idx_or_err;
+
+	cache_idx_or_err = bpf_local_storage_cache_idx_get(&inode_cache,
+							   attr->map_extra);
+	if (cache_idx_or_err < 0)
+		return ERR_PTR(cache_idx_or_err);
 
 	smap = bpf_local_storage_map_alloc(attr);
-	if (IS_ERR(smap))
+	if (IS_ERR(smap)) {
+		bpf_local_storage_cache_idx_free(&inode_cache, (u16)cache_idx_or_err,
+						 attr->map_extra);
 		return ERR_CAST(smap);
+	}
 
-	smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache);
+	smap->cache_idx = (u16)cache_idx_or_err;
 	return &smap->map;
 }
 
@@ -241,7 +250,8 @@  static void inode_storage_map_free(struct bpf_map *map)
 	struct bpf_local_storage_map *smap;
 
 	smap = (struct bpf_local_storage_map *)map;
-	bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
+	bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx,
+					 map->map_extra);
 	bpf_local_storage_map_free(smap, NULL);
 }
 
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 01aa2b51ec4d..b23080247bef 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -231,12 +231,19 @@  bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 {
 	struct bpf_local_storage_data *sdata;
 	struct bpf_local_storage_elem *selem;
+	struct bpf_local_storage_map *cached;
+	bool cached_exclusive = false;
 
 	/* Fast path (cache hit) */
 	sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
 				      bpf_rcu_lock_held());
-	if (sdata && rcu_access_pointer(sdata->smap) == smap)
-		return sdata;
+	if (sdata) {
+		if (rcu_access_pointer(sdata->smap) == smap)
+			return sdata;
+
+		cached = rcu_dereference_check(sdata->smap, bpf_rcu_lock_held());
+		cached_exclusive = cached->map.map_extra & BPF_LOCAL_STORAGE_FORCE_CACHE;
+	}
 
 	/* Slow path (cache miss) */
 	hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
@@ -248,7 +255,7 @@  bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 		return NULL;
 
 	sdata = SDATA(selem);
-	if (cacheit_lockit) {
+	if (cacheit_lockit && !cached_exclusive) {
 		unsigned long flags;
 
 		/* spinlock is needed to avoid racing with the
@@ -482,15 +489,27 @@  bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 	return ERR_PTR(err);
 }
 
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
+int bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache,
+				    u64 flags)
 {
+	bool exclusive = flags & BPF_LOCAL_STORAGE_FORCE_CACHE;
+	bool adding_to_full = false;
 	u64 min_usage = U64_MAX;
-	u16 i, res = 0;
+	int res = 0;
+	u16 i;
 
 	spin_lock(&cache->idx_lock);
 
+	if (bitmap_full(cache->idx_exclusive, BPF_LOCAL_STORAGE_CACHE_SIZE)) {
+		res = -ENOMEM;
+		adding_to_full = true;
+		if (exclusive)
+			goto out;
+	}
+
 	for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
-		if (cache->idx_usage_counts[i] < min_usage) {
+		if ((adding_to_full || !test_bit(i, cache->idx_exclusive)) &&
+		    cache->idx_usage_counts[i] < min_usage) {
 			min_usage = cache->idx_usage_counts[i];
 			res = i;
 
@@ -499,17 +518,23 @@  u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
 				break;
 		}
 	}
+
+	if (exclusive)
+		set_bit(res, cache->idx_exclusive);
 	cache->idx_usage_counts[res]++;
 
+out:
 	spin_unlock(&cache->idx_lock);
 
 	return res;
 }
 
 void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
-				      u16 idx)
+				      u16 idx, u64 flags)
 {
 	spin_lock(&cache->idx_lock);
+	if (flags & BPF_LOCAL_STORAGE_FORCE_CACHE)
+		clear_bit(idx, cache->idx_exclusive);
 	cache->idx_usage_counts[idx]--;
 	spin_unlock(&cache->idx_lock);
 }
@@ -583,7 +608,8 @@  int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
 	    attr->max_entries ||
 	    attr->key_size != sizeof(int) || !attr->value_size ||
 	    /* Enforce BTF for userspace sk dumping */
-	    !attr->btf_key_type_id || !attr->btf_value_type_id)
+	    !attr->btf_key_type_id || !attr->btf_value_type_id ||
+	    attr->map_extra & ~BPF_LOCAL_STORAGE_FORCE_CACHE)
 		return -EINVAL;
 
 	if (!bpf_capable())
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 6638a0ecc3d2..bf7b098d15c9 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -289,12 +289,21 @@  static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
 static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_local_storage_map *smap;
+	int cache_idx_or_err;
+
+	cache_idx_or_err = bpf_local_storage_cache_idx_get(&task_cache,
+							   attr->map_extra);
+	if (cache_idx_or_err < 0)
+		return ERR_PTR(cache_idx_or_err);
 
 	smap = bpf_local_storage_map_alloc(attr);
-	if (IS_ERR(smap))
+	if (IS_ERR(smap)) {
+		bpf_local_storage_cache_idx_free(&task_cache, (u16)cache_idx_or_err,
+						 attr->map_extra);
 		return ERR_CAST(smap);
+	}
 
-	smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+	smap->cache_idx = (u16)cache_idx_or_err;
 	return &smap->map;
 }
 
@@ -303,7 +312,8 @@  static void task_storage_map_free(struct bpf_map *map)
 	struct bpf_local_storage_map *smap;
 
 	smap = (struct bpf_local_storage_map *)map;
-	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx,
+					 map->map_extra);
 	bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e9621cfa09f2..9fd610e53840 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -847,8 +847,11 @@  static int map_create(union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
-	    attr->map_extra != 0)
+	if (!(attr->map_type == BPF_MAP_TYPE_BLOOM_FILTER ||
+	      attr->map_type == BPF_MAP_TYPE_INODE_STORAGE ||
+	      attr->map_type == BPF_MAP_TYPE_TASK_STORAGE ||
+	      attr->map_type == BPF_MAP_TYPE_SK_STORAGE) &&
+	     attr->map_extra != 0)
 		return -EINVAL;
 
 	f_flags = bpf_get_file_flag(attr->map_flags);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index e3ac36380520..f6a95f525f50 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -90,19 +90,28 @@  static void bpf_sk_storage_map_free(struct bpf_map *map)
 	struct bpf_local_storage_map *smap;
 
 	smap = (struct bpf_local_storage_map *)map;
-	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx, map->map_extra);
 	bpf_local_storage_map_free(smap, NULL);
 }
 
 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_local_storage_map *smap;
+	int cache_idx_or_err;
+
+	cache_idx_or_err = bpf_local_storage_cache_idx_get(&sk_cache,
+							   attr->map_extra);
+	if (cache_idx_or_err < 0)
+		return ERR_PTR(cache_idx_or_err);
 
 	smap = bpf_local_storage_map_alloc(attr);
-	if (IS_ERR(smap))
+	if (IS_ERR(smap)) {
+		bpf_local_storage_cache_idx_free(&sk_cache, (u16)cache_idx_or_err,
+						 attr->map_extra);
 		return ERR_CAST(smap);
+	}
 
-	smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
+	smap->cache_idx = (u16)cache_idx_or_err;
 	return &smap->map;
 }