diff mbox series

[v2,4/7] mm, slab: reintroduce rcu_barrier() into kmem_cache_destroy()

Message ID 20240807-b4-slab-kfree_rcu-destroy-v2-4-ea79102f428c@suse.cz (mailing list archive)
State Accepted
Commit 2eb14c1c2717396f2fb1e4a4c5a1ec87cdd174f6
Headers show
Series mm, slub: handle pending kfree_rcu() in kmem_cache_destroy() | expand

Commit Message

Vlastimil Babka Aug. 7, 2024, 10:31 a.m. UTC
There used to be a rcu_barrier() for SLAB_TYPESAFE_BY_RCU caches in
kmem_cache_destroy() until commit 657dc2f97220 ("slab: remove
synchronous rcu_barrier() call in memcg cache release path") moved it to
an asynchronous work that finishes the destroying of such caches.

The motivation for that commit was the MEMCG_KMEM integration that at
the time created and removed clones of the global slab caches together
with their cgroups, and blocking cgroups removal was unwelcome. The
implementation later changed to per-object memcg tracking using a single
cache, so there should be no more need for a fast non-blocking
kmem_cache_destroy(), which is typically only done when a module is
unloaded etc.

Going back to synchronous barrier has the following advantages:

- simpler implementation
- it's easier to test the result of kmem_cache_destroy() in a kunit test

Thus effectively revert commit 657dc2f97220. It is not a 1:1 revert as
the code has changed since. The main part is that kmem_cache_release(s)
is always called from kmem_cache_destroy(), but for SLAB_TYPESAFE_BY_RCU
caches there's a rcu_barrier() first.

Suggested-by: Mateusz Guzik <mjguzik@gmail.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slab_common.c | 47 ++++-------------------------------------------
 1 file changed, 4 insertions(+), 43 deletions(-)

Comments

Jann Horn Aug. 7, 2024, 7:11 p.m. UTC | #1
On Wed, Aug 7, 2024 at 12:31 PM Vlastimil Babka <vbabka@suse.cz> wrote:
> There used to be a rcu_barrier() for SLAB_TYPESAFE_BY_RCU caches in
> kmem_cache_destroy() until commit 657dc2f97220 ("slab: remove
> synchronous rcu_barrier() call in memcg cache release path") moved it to
> an asynchronous work that finishes the destroying of such caches.
>
> The motivation for that commit was the MEMCG_KMEM integration that at
> the time created and removed clones of the global slab caches together
> with their cgroups, and blocking cgroups removal was unwelcome. The
> implementation later changed to per-object memcg tracking using a single
> cache, so there should be no more need for a fast non-blocking
> kmem_cache_destroy(), which is typically only done when a module is
> unloaded etc.
>
> Going back to synchronous barrier has the following advantages:
>
> - simpler implementation
> - it's easier to test the result of kmem_cache_destroy() in a kunit test
>
> Thus effectively revert commit 657dc2f97220. It is not a 1:1 revert as
> the code has changed since. The main part is that kmem_cache_release(s)
> is always called from kmem_cache_destroy(), but for SLAB_TYPESAFE_BY_RCU
> caches there's a rcu_barrier() first.
>
> Suggested-by: Mateusz Guzik <mjguzik@gmail.com>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

Reviewed-by: Jann Horn <jannh@google.com>
diff mbox series

Patch

diff --git a/mm/slab_common.c b/mm/slab_common.c
index a079b8540334..c40227d5fa07 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -40,11 +40,6 @@  LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
-static LIST_HEAD(slab_caches_to_rcu_destroy);
-static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
-static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
-		    slab_caches_to_rcu_destroy_workfn);
-
 /*
  * Set of flags that will prevent slab merging
  */
@@ -499,33 +494,6 @@  static void kmem_cache_release(struct kmem_cache *s)
 		slab_kmem_cache_release(s);
 }
 
-static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
-{
-	LIST_HEAD(to_destroy);
-	struct kmem_cache *s, *s2;
-
-	/*
-	 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
-	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed
-	 * through RCU and the associated kmem_cache are dereferenced
-	 * while freeing the pages, so the kmem_caches should be freed only
-	 * after the pending RCU operations are finished.  As rcu_barrier()
-	 * is a pretty slow operation, we batch all pending destructions
-	 * asynchronously.
-	 */
-	mutex_lock(&slab_mutex);
-	list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
-	mutex_unlock(&slab_mutex);
-
-	if (list_empty(&to_destroy))
-		return;
-
-	rcu_barrier();
-
-	list_for_each_entry_safe(s, s2, &to_destroy, list)
-		kmem_cache_release(s);
-}
-
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
 	__kmem_cache_release(s);
@@ -535,7 +503,6 @@  void slab_kmem_cache_release(struct kmem_cache *s)
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
-	bool rcu_set;
 	int err;
 
 	if (unlikely(!s) || !kasan_check_byte(s))
@@ -551,8 +518,6 @@  void kmem_cache_destroy(struct kmem_cache *s)
 		return;
 	}
 
-	rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
-
 	/* free asan quarantined objects */
 	kasan_cache_shutdown(s);
 
@@ -572,14 +537,10 @@  void kmem_cache_destroy(struct kmem_cache *s)
 	if (err)
 		return;
 
-	if (rcu_set) {
-		mutex_lock(&slab_mutex);
-		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
-		schedule_work(&slab_caches_to_rcu_destroy_work);
-		mutex_unlock(&slab_mutex);
-	} else {
-		kmem_cache_release(s);
-	}
+	if (s->flags & SLAB_TYPESAFE_BY_RCU)
+		rcu_barrier();
+
+	kmem_cache_release(s);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);