diff mbox series

[RFC,bpf-next,05/10] mm: Add helper to recharge kmalloc'ed address

Message ID 20220619155032.32515-6-laoar.shao@gmail.com (mailing list archive)
State New
Headers show
Series bpf, mm: Recharge pages when reuse bpf map | expand

Commit Message

Yafang Shao June 19, 2022, 3:50 p.m. UTC
This patch introduces a helper to recharge the corresponding pages of a
given kmalloc'ed address. The recharge is divided into three steps,
  - pre charge to the new memcg
    To make sure once we uncharge from the old memcg, we can always charge
    to the new memcg succeesfully. If we can't pre charge to the new memcg,
    we won't allow it to be uncharged from the old memcg.
  - uncharge from the old memcg
    After pre charge to the new memcg, we can uncharge from the old memcg.
  - post charge to the new memcg
    Modify the counters of the new memcg.

Sometimes we may want to recharge many kmalloc'ed addresses to the same
memcg, in that case we should pre charge all these addresses first, then
do the uncharge and finnally do the post charge. But it may happens that
after succeesfully pre charge some address we fail to pre charge a new
address, then we have to cancel the finished pre charge, so charge err is
introduced for this purpose.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/linux/slab.h |  17 ++++++
 mm/slab.c            |  85 +++++++++++++++++++++++++++++
 mm/slob.c            |   7 +++
 mm/slub.c            | 125 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 234 insertions(+)
diff mbox series

Patch

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0fefdf528e0d..18ab30aa8fe8 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -194,6 +194,23 @@  bool kmem_valid_obj(void *object);
 void kmem_dump_obj(void *object);
 #endif
 
+/*
+ * The recharge will be separated into three steps:
+ *	MEMCG_KMEM_PRE_CHARGE  : pre charge to the new memcg
+ *	MEMCG_KMEM_UNCHARGE    : uncharge from the old memcg
+ *	MEMCG_KMEM_POST_CHARGE : post charge to the new memcg
+ * and an error handler:
+ *	MEMCG_KMEM_CHARGE_ERR  : in pre charge state, we may succeed to
+ *	                         charge some objp's but fail to charge
+ *	                         a new one, then in this case we should
+ *	                         uncharge the already charged objp's.
+ */
+#define MEMCG_KMEM_PRE_CHARGE	0
+#define MEMCG_KMEM_UNCHARGE	1
+#define MEMCG_KMEM_POST_CHARGE	2
+#define MEMCG_KMEM_CHARGE_ERR	3
+bool krecharge(const void *objp, int step);
+
 /*
  * Some archs want to perform DMA into kmalloc caches and need a guaranteed
  * alignment larger than the alignment of a 64-bit integer.
diff --git a/mm/slab.c b/mm/slab.c
index f8cd00f4ba13..4795014edd30 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3798,6 +3798,91 @@  void kfree(const void *objp)
 }
 EXPORT_SYMBOL(kfree);
 
+bool krecharge(const void *objp, int step)
+{
+	void *object = (void *)objp;
+	struct obj_cgroup *objcg_old;
+	struct obj_cgroup *objcg_new;
+	struct obj_cgroup **objcgs;
+	struct kmem_cache *s;
+	struct slab *slab;
+	unsigned long flags;
+	unsigned int off;
+
+	WARN_ON(!in_task());
+
+	if (unlikely(ZERO_OR_NULL_PTR(objp)))
+		return true;
+
+	if (!memcg_kmem_enabled())
+		return true;
+
+	local_irq_save(flags);
+	s = virt_to_cache(objp);
+	if (!s)
+		goto out;
+
+	if (!(s->flags & SLAB_ACCOUNT))
+		goto out;
+
+	slab = virt_to_slab(object);
+	if (!slab)
+		goto out;
+
+	objcgs = slab_objcgs(slab);
+	if (!objcgs)
+		goto out;
+
+	off = obj_to_index(s, slab, object);
+	objcg_old = objcgs[off];
+	if (!objcg_old && step != MEMCG_KMEM_POST_CHARGE)
+		goto out;
+
+	/*
+	 *  The recharge can be separated into three steps,
+	 *  1. Pre charge to the new memcg
+	 *  2. Uncharge from the old memcg
+	 *  3. Charge to the new memcg
+	 */
+	switch (step) {
+	case MEMCG_KMEM_PRE_CHARGE:
+		/* Pre recharge */
+		objcg_new = get_obj_cgroup_from_current();
+		WARN_ON(!objcg_new);
+		if (obj_cgroup_charge(objcg_new, GFP_KERNEL, obj_full_size(s))) {
+			obj_cgroup_put(objcg_new);
+			local_irq_restore(flags);
+			return false;
+		}
+		break;
+	case MEMCG_KMEM_UNCHARGE:
+		/* Uncharge from the old memcg */
+		obj_cgroup_uncharge(objcg_old, obj_full_size(s));
+		objcgs[off] = NULL;
+		mod_objcg_state(objcg_old, slab_pgdat(slab), cache_vmstat_idx(s),
+				-obj_full_size(s));
+		obj_cgroup_put(objcg_old);
+		break;
+	case MEMCG_KMEM_POST_CHARGE:
+		/* Charge to the new memcg */
+		objcg_new = obj_cgroup_from_current();
+		objcgs[off] = objcg_new;
+		mod_objcg_state(objcg_new, slab_pgdat(slab), cache_vmstat_idx(s), obj_full_size(s));
+		break;
+	case MEMCG_KMEM_CHARGE_ERR:
+		objcg_new = obj_cgroup_from_current();
+		obj_cgroup_uncharge(objcg_new, obj_full_size(s));
+		obj_cgroup_put(objcg_new);
+		break;
+	}
+
+out:
+	local_irq_restore(flags);
+
+	return true;
+}
+EXPORT_SYMBOL(krecharge);
+
 /*
  * This initializes kmem_cache_node or resizes various caches for all nodes.
  */
diff --git a/mm/slob.c b/mm/slob.c
index f47811f09aca..6d68ad57b4a2 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -574,6 +574,13 @@  void kfree(const void *block)
 }
 EXPORT_SYMBOL(kfree);
 
+/* kmemcg is no supported for SLOB */
+bool krecharge(const void *block, int step)
+{
+	return true;
+}
+EXPORT_SYMBOL(krecharge);
+
 /* can't use ksize for kmem_cache_alloc memory, only kmalloc */
 size_t __ksize(const void *block)
 {
diff --git a/mm/slub.c b/mm/slub.c
index e5535020e0fd..ef6475ed6407 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4556,6 +4556,131 @@  void kfree(const void *x)
 }
 EXPORT_SYMBOL(kfree);
 
+bool krecharge(const void *x, int step)
+{
+	void *object = (void *)x;
+	struct obj_cgroup *objcg_old;
+	struct obj_cgroup *objcg_new;
+	struct obj_cgroup **objcgs;
+	struct kmem_cache *s;
+	struct folio *folio;
+	struct slab *slab;
+	unsigned int off;
+
+	WARN_ON(!in_task());
+
+	if (!memcg_kmem_enabled())
+		return true;
+
+	if (unlikely(ZERO_OR_NULL_PTR(x)))
+		return true;
+
+	folio = virt_to_folio(x);
+	if (unlikely(!folio_test_slab(folio))) {
+		unsigned int order = folio_order(folio);
+		struct page *page;
+
+		switch (step) {
+		case MEMCG_KMEM_PRE_CHARGE:
+			objcg_new = get_obj_cgroup_from_current();
+			WARN_ON(!objcg_new);
+			/* Try charge current memcg */
+			if (obj_cgroup_charge_pages(objcg_new, GFP_KERNEL,
+						    1 << order)) {
+				obj_cgroup_put(objcg_new);
+				return false;
+			}
+			break;
+		case MEMCG_KMEM_UNCHARGE:
+			/* Uncharge folio memcg */
+			objcg_old = __folio_objcg(folio);
+			page = folio_page(folio, 0);
+			WARN_ON(!objcg_old);
+			obj_cgroup_uncharge_pages(objcg_old, 1 << order);
+			mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+						-(PAGE_SIZE << order));
+			page->memcg_data = 0;
+			obj_cgroup_put(objcg_old);
+			break;
+		case MEMCG_KMEM_POST_CHARGE:
+			/* Set current memcg to folio page */
+			objcg_new = obj_cgroup_from_current();
+			page = folio_page(folio, 0);
+			page->memcg_data = (unsigned long)objcg_new | MEMCG_DATA_KMEM;
+			mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+						-(PAGE_SIZE << order));
+			break;
+		case MEMCG_KMEM_CHARGE_ERR:
+			objcg_new = obj_cgroup_from_current();
+			obj_cgroup_uncharge_pages(objcg_new, 1 << order);
+			obj_cgroup_put(objcg_new);
+			break;
+		}
+		return true;
+	}
+
+	slab = folio_slab(folio);
+	if (!slab)
+		return true;
+
+	s = slab->slab_cache;
+	if (!(s->flags & SLAB_ACCOUNT))
+		return true;
+
+	objcgs = slab_objcgs(slab);
+	if (!objcgs)
+		return true;
+	off = obj_to_index(s, slab, object);
+	objcg_old = objcgs[off];
+	/* In step MEMCG_KMEM_UNCHARGE, the objcg will set to NULL. */
+	if (!objcg_old && step != MEMCG_KMEM_POST_CHARGE)
+		return true;
+
+	/*
+	 *  The recharge can be separated into three steps,
+	 *  1. Pre charge to the new memcg
+	 *  2. Uncharge from the old memcg
+	 *  3. Charge to the new memcg
+	 */
+	switch (step) {
+	case MEMCG_KMEM_PRE_CHARGE:
+		/*
+		 * Before uncharge from the old memcg, we must pre charge the new memcg
+		 * first, to make sure it always succeed to recharge to the new memcg
+		 * after uncharge from the old memcg.
+		 */
+		objcg_new = get_obj_cgroup_from_current();
+		WARN_ON(!objcg_new);
+		if (obj_cgroup_charge(objcg_new, GFP_KERNEL, obj_full_size(s))) {
+			obj_cgroup_put(objcg_new);
+			return false;
+		}
+		break;
+	case MEMCG_KMEM_UNCHARGE:
+		/* Uncharge from old memcg */
+		obj_cgroup_uncharge(objcg_old, obj_full_size(s));
+		objcgs[off] = NULL;
+		mod_objcg_state(objcg_old, slab_pgdat(slab), cache_vmstat_idx(s),
+				-obj_full_size(s));
+		obj_cgroup_put(objcg_old);
+		break;
+	case MEMCG_KMEM_POST_CHARGE:
+		/* Charge to the new memcg */
+		objcg_new = obj_cgroup_from_current();
+		objcgs[off] = objcg_new;
+		mod_objcg_state(objcg_new, slab_pgdat(slab), cache_vmstat_idx(s), obj_full_size(s));
+		break;
+	case MEMCG_KMEM_CHARGE_ERR:
+		objcg_new = obj_cgroup_from_current();
+		obj_cgroup_uncharge(objcg_new, obj_full_size(s));
+		obj_cgroup_put(objcg_new);
+		break;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(krecharge);
+
 #define SHRINK_PROMOTE_MAX 32
 
 /*