diff mbox series

[bpf-next,v7,32/34] bpf: eliminate rlimit-based memory accounting infra for bpf maps

Message ID 20201119173754.4125257-33-guro@fb.com (mailing list archive)
State New, archived
Headers show
Series bpf: switch to memcg-based memory accounting | expand

Commit Message

Roman Gushchin Nov. 19, 2020, 5:37 p.m. UTC
Remove rlimit-based accounting infrastructure code, which is not used
anymore.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
---
 include/linux/bpf.h                           | 12 ----
 kernel/bpf/syscall.c                          | 64 +------------------
 .../selftests/bpf/progs/bpf_iter_bpf_map.c    |  2 +-
 .../selftests/bpf/progs/map_ptr_kern.c        |  7 --
 4 files changed, 3 insertions(+), 82 deletions(-)

Comments

Alexei Starovoitov Nov. 21, 2020, 2:52 a.m. UTC | #1
On Thu, Nov 19, 2020 at 09:37:52AM -0800, Roman Gushchin wrote:
>  static void bpf_map_put_uref(struct bpf_map *map)
> @@ -619,7 +562,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
>  		   "value_size:\t%u\n"
>  		   "max_entries:\t%u\n"
>  		   "map_flags:\t%#x\n"
> -		   "memlock:\t%llu\n"
> +		   "memlock:\t%llu\n" /* deprecated */
>  		   "map_id:\t%u\n"
>  		   "frozen:\t%u\n",
>  		   map->map_type,
> @@ -627,7 +570,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
>  		   map->value_size,
>  		   map->max_entries,
>  		   map->map_flags,
> -		   map->memory.pages * 1ULL << PAGE_SHIFT,
> +		   0LLU,

The set looks great to me overall, but above change is problematic.
There are tools out there that read this value.
Returning zero might cause oncall alarms to trigger.
I think we can be more accurate here.
Instead of zero the kernel can return
round_up(max_entries * round_up(key_size + value_size, 8), PAGE_SIZE)
It's not the same as before, but at least the numbers won't suddenly
go to zero and comparison between maps is still relevant.
Of course we can introduce a page size calculating callback per map type,
but imo that would be overkill. These monitoring tools don't care about
precise number, but rather about relative value and growth from one
version of the application to another.

If Daniel doesn't find other issues this can be fixed in the follow up.
Roman Gushchin Nov. 21, 2020, 2:59 a.m. UTC | #2
On Fri, Nov 20, 2020 at 06:52:27PM -0800, Alexei Starovoitov wrote:
> On Thu, Nov 19, 2020 at 09:37:52AM -0800, Roman Gushchin wrote:
> >  static void bpf_map_put_uref(struct bpf_map *map)
> > @@ -619,7 +562,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
> >  		   "value_size:\t%u\n"
> >  		   "max_entries:\t%u\n"
> >  		   "map_flags:\t%#x\n"
> > -		   "memlock:\t%llu\n"
> > +		   "memlock:\t%llu\n" /* deprecated */
> >  		   "map_id:\t%u\n"
> >  		   "frozen:\t%u\n",
> >  		   map->map_type,
> > @@ -627,7 +570,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
> >  		   map->value_size,
> >  		   map->max_entries,
> >  		   map->map_flags,
> > -		   map->memory.pages * 1ULL << PAGE_SHIFT,
> > +		   0LLU,
> 
> The set looks great to me overall, but above change is problematic.
> There are tools out there that read this value.
> Returning zero might cause oncall alarms to trigger.
> I think we can be more accurate here.
> Instead of zero the kernel can return
> round_up(max_entries * round_up(key_size + value_size, 8), PAGE_SIZE)
> It's not the same as before, but at least the numbers won't suddenly
> go to zero and comparison between maps is still relevant.
> Of course we can introduce a page size calculating callback per map type,
> but imo that would be overkill. These monitoring tools don't care about
> precise number, but rather about relative value and growth from one
> version of the application to another.
> 
> If Daniel doesn't find other issues this can be fixed in the follow up.

Makes total sense. I'll prepare a follow-up patch.

Thanks!
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b11436cb9e3d..c9322adedd50 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -137,11 +137,6 @@  struct bpf_map_ops {
 	const struct bpf_iter_seq_info *iter_seq_info;
 };
 
-struct bpf_map_memory {
-	u32 pages;
-	struct user_struct *user;
-};
-
 struct bpf_map {
 	/* The first two cachelines with read-mostly members of which some
 	 * are also accessed in fast-path (e.g. ops, max_entries).
@@ -162,7 +157,6 @@  struct bpf_map {
 	u32 btf_key_type_id;
 	u32 btf_value_type_id;
 	struct btf *btf;
-	struct bpf_map_memory memory;
 #ifdef CONFIG_MEMCG_KMEM
 	struct mem_cgroup *memcg;
 #endif
@@ -1223,12 +1217,6 @@  void bpf_map_inc_with_uref(struct bpf_map *map);
 struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
-int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
-void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
-int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size);
-void bpf_map_charge_finish(struct bpf_map_memory *mem);
-void bpf_map_charge_move(struct bpf_map_memory *dst,
-			 struct bpf_map_memory *src);
 void *bpf_map_area_alloc(u64 size, int numa_node);
 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
 void bpf_map_area_free(void *base);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4b514bb5cd70..f065121d6ffa 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -360,60 +360,6 @@  static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
 		atomic_long_sub(pages, &user->locked_vm);
 }
 
-int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
-{
-	u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
-	struct user_struct *user;
-	int ret;
-
-	if (size >= U32_MAX - PAGE_SIZE)
-		return -E2BIG;
-
-	user = get_current_user();
-	ret = bpf_charge_memlock(user, pages);
-	if (ret) {
-		free_uid(user);
-		return ret;
-	}
-
-	mem->pages = pages;
-	mem->user = user;
-
-	return 0;
-}
-
-void bpf_map_charge_finish(struct bpf_map_memory *mem)
-{
-	bpf_uncharge_memlock(mem->user, mem->pages);
-	free_uid(mem->user);
-}
-
-void bpf_map_charge_move(struct bpf_map_memory *dst,
-			 struct bpf_map_memory *src)
-{
-	*dst = *src;
-
-	/* Make sure src will not be used for the redundant uncharging. */
-	memset(src, 0, sizeof(struct bpf_map_memory));
-}
-
-int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
-{
-	int ret;
-
-	ret = bpf_charge_memlock(map->memory.user, pages);
-	if (ret)
-		return ret;
-	map->memory.pages += pages;
-	return ret;
-}
-
-void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
-{
-	bpf_uncharge_memlock(map->memory.user, pages);
-	map->memory.pages -= pages;
-}
-
 static int bpf_map_alloc_id(struct bpf_map *map)
 {
 	int id;
@@ -533,14 +479,11 @@  static void bpf_map_release_memcg(struct bpf_map *map)
 static void bpf_map_free_deferred(struct work_struct *work)
 {
 	struct bpf_map *map = container_of(work, struct bpf_map, work);
-	struct bpf_map_memory mem;
 
-	bpf_map_charge_move(&mem, &map->memory);
 	security_bpf_map_free(map);
 	bpf_map_release_memcg(map);
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
-	bpf_map_charge_finish(&mem);
 }
 
 static void bpf_map_put_uref(struct bpf_map *map)
@@ -619,7 +562,7 @@  static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "value_size:\t%u\n"
 		   "max_entries:\t%u\n"
 		   "map_flags:\t%#x\n"
-		   "memlock:\t%llu\n"
+		   "memlock:\t%llu\n" /* deprecated */
 		   "map_id:\t%u\n"
 		   "frozen:\t%u\n",
 		   map->map_type,
@@ -627,7 +570,7 @@  static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 		   map->value_size,
 		   map->max_entries,
 		   map->map_flags,
-		   map->memory.pages * 1ULL << PAGE_SHIFT,
+		   0LLU,
 		   map->id,
 		   READ_ONCE(map->frozen));
 	if (type) {
@@ -870,7 +813,6 @@  static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 static int map_create(union bpf_attr *attr)
 {
 	int numa_node = bpf_map_attr_numa_node(attr);
-	struct bpf_map_memory mem;
 	struct bpf_map *map;
 	int f_flags;
 	int err;
@@ -969,9 +911,7 @@  static int map_create(union bpf_attr *attr)
 	security_bpf_map_free(map);
 free_map:
 	btf_put(map->btf);
-	bpf_map_charge_move(&mem, &map->memory);
 	map->ops->map_free(map);
-	bpf_map_charge_finish(&mem);
 	return err;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index 08651b23edba..b83b5d2e17dc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -23,6 +23,6 @@  int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
 
 	BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
 		       map->usercnt.counter,
-		       map->memory.user->locked_vm.counter);
+		       0LLU);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index c325405751e2..d8850bc6a9f1 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -26,17 +26,12 @@  __u32 g_line = 0;
 		return 0;	\
 })
 
-struct bpf_map_memory {
-	__u32 pages;
-} __attribute__((preserve_access_index));
-
 struct bpf_map {
 	enum bpf_map_type map_type;
 	__u32 key_size;
 	__u32 value_size;
 	__u32 max_entries;
 	__u32 id;
-	struct bpf_map_memory memory;
 } __attribute__((preserve_access_index));
 
 static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
@@ -47,7 +42,6 @@  static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
 	VERIFY(map->value_size == value_size);
 	VERIFY(map->max_entries == max_entries);
 	VERIFY(map->id > 0);
-	VERIFY(map->memory.pages > 0);
 
 	return 1;
 }
@@ -60,7 +54,6 @@  static inline int check_bpf_map_ptr(struct bpf_map *indirect,
 	VERIFY(indirect->value_size == direct->value_size);
 	VERIFY(indirect->max_entries == direct->max_entries);
 	VERIFY(indirect->id == direct->id);
-	VERIFY(indirect->memory.pages == direct->memory.pages);
 
 	return 1;
 }