diff mbox series

[v2,2/7] mm, slab: unlink slabinfo, sysfs and debugfs immediately

Message ID 20240807-b4-slab-kfree_rcu-destroy-v2-2-ea79102f428c@suse.cz (mailing list archive)
State Accepted
Commit 4ec10268ed98a3d568a39861e7b7d0a0fa7cbe60
Headers show
Series mm, slub: handle pending kfree_rcu() in kmem_cache_destroy() | expand

Commit Message

Vlastimil Babka Aug. 7, 2024, 10:31 a.m. UTC
kmem_cache_destroy() includes removing the associated sysfs and debugfs
directories, and the cache from the list of caches that appears in
/proc/slabinfo. Currently this might not happen immediately when:

- the cache is SLAB_TYPESAFE_BY_RCU and the cleanup is delayed,
  including the directores removal
- __kmem_cache_shutdown() fails due to outstanding objects - the
  directories remain indefinitely

When a cache is recreated with the same name, such as due to module
unload followed by a load, the directories will fail to be recreated for
the new instance of the cache due to the old directories being present.
The cache will also appear twice in /proc/slabinfo.

While we want to convert the SLAB_TYPESAFE_BY_RCU cleanup to be
synchronous again, the second point remains. So let's fix this first and
have the directories and slabinfo removed immediately in
kmem_cache_destroy() and regardless of __kmem_cache_shutdown() success.

This should not make debugging harder if __kmem_cache_shutdown() fails,
because a detailed report of outstanding objects is printed into dmesg
already due to the failure.

Also simplify kmem_cache_release() sysfs handling by using
__is_defined(SLAB_SUPPORTS_SYSFS).

Note the resulting code in kmem_cache_destroy() is a bit ugly but will
be further simplified - this is in order to make small bisectable steps.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slab_common.c | 57 ++++++++++++++++++++++++++------------------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

Comments

Jann Horn Aug. 7, 2024, 7:11 p.m. UTC | #1
On Wed, Aug 7, 2024 at 12:31 PM Vlastimil Babka <vbabka@suse.cz> wrote:
> kmem_cache_destroy() includes removing the associated sysfs and debugfs
> directories, and the cache from the list of caches that appears in
> /proc/slabinfo. Currently this might not happen immediately when:
>
> - the cache is SLAB_TYPESAFE_BY_RCU and the cleanup is delayed,
>   including the directores removal
> - __kmem_cache_shutdown() fails due to outstanding objects - the
>   directories remain indefinitely
>
> When a cache is recreated with the same name, such as due to module
> unload followed by a load, the directories will fail to be recreated for
> the new instance of the cache due to the old directories being present.
> The cache will also appear twice in /proc/slabinfo.
>
> While we want to convert the SLAB_TYPESAFE_BY_RCU cleanup to be
> synchronous again, the second point remains. So let's fix this first and
> have the directories and slabinfo removed immediately in
> kmem_cache_destroy() and regardless of __kmem_cache_shutdown() success.
>
> This should not make debugging harder if __kmem_cache_shutdown() fails,
> because a detailed report of outstanding objects is printed into dmesg
> already due to the failure.

Reading this sentence made be curious what __kmem_cache_shutdown()
actually does - and I think technically, it prints a report of only
the outstanding objects *on the first NUMA node with outstanding
objects*? __kmem_cache_shutdown() bails immediately after seeing one
node with outstanding objects.

That's not really relevant to this series though, just a random observation.

> Also simplify kmem_cache_release() sysfs handling by using
> __is_defined(SLAB_SUPPORTS_SYSFS).
>
> Note the resulting code in kmem_cache_destroy() is a bit ugly but will
> be further simplified - this is in order to make small bisectable steps.
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

Reviewed-by: Jann Horn <jannh@google.com>
diff mbox series

Patch

diff --git a/mm/slab_common.c b/mm/slab_common.c
index b76d65d7fe33..db61df3b4282 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -484,31 +484,19 @@  kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
 }
 EXPORT_SYMBOL(kmem_buckets_create);
 
-#ifdef SLAB_SUPPORTS_SYSFS
 /*
  * For a given kmem_cache, kmem_cache_destroy() should only be called
  * once or there will be a use-after-free problem. The actual deletion
  * and release of the kobject does not need slab_mutex or cpu_hotplug_lock
  * protection. So they are now done without holding those locks.
- *
- * Note that there will be a slight delay in the deletion of sysfs files
- * if kmem_cache_release() is called indrectly from a work function.
  */
 static void kmem_cache_release(struct kmem_cache *s)
 {
-	if (slab_state >= FULL) {
-		sysfs_slab_unlink(s);
+	if (__is_defined(SLAB_SUPPORTS_SYSFS) && slab_state >= FULL)
 		sysfs_slab_release(s);
-	} else {
+	else
 		slab_kmem_cache_release(s);
-	}
 }
-#else
-static void kmem_cache_release(struct kmem_cache *s)
-{
-	slab_kmem_cache_release(s);
-}
-#endif
 
 static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 {
@@ -534,7 +522,6 @@  static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 	rcu_barrier();
 
 	list_for_each_entry_safe(s, s2, &to_destroy, list) {
-		debugfs_slab_release(s);
 		kfence_shutdown_cache(s);
 		kmem_cache_release(s);
 	}
@@ -549,8 +536,8 @@  void slab_kmem_cache_release(struct kmem_cache *s)
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
-	int err = -EBUSY;
 	bool rcu_set;
+	int err;
 
 	if (unlikely(!s) || !kasan_check_byte(s))
 		return;
@@ -558,11 +545,14 @@  void kmem_cache_destroy(struct kmem_cache *s)
 	cpus_read_lock();
 	mutex_lock(&slab_mutex);
 
-	rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
-
 	s->refcount--;
-	if (s->refcount)
-		goto out_unlock;
+	if (s->refcount) {
+		mutex_unlock(&slab_mutex);
+		cpus_read_unlock();
+		return;
+	}
+
+	rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
 
 	/* free asan quarantined objects */
 	kasan_cache_shutdown(s);
@@ -571,24 +561,29 @@  void kmem_cache_destroy(struct kmem_cache *s)
 	WARN(err, "%s %s: Slab cache still has objects when called from %pS",
 	     __func__, s->name, (void *)_RET_IP_);
 
-	if (err)
-		goto out_unlock;
-
 	list_del(&s->list);
 
-	if (rcu_set) {
-		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
-		schedule_work(&slab_caches_to_rcu_destroy_work);
-	} else {
+	if (!err && !rcu_set)
 		kfence_shutdown_cache(s);
-		debugfs_slab_release(s);
-	}
 
-out_unlock:
 	mutex_unlock(&slab_mutex);
 	cpus_read_unlock();
-	if (!err && !rcu_set)
+
+	if (slab_state >= FULL)
+		sysfs_slab_unlink(s);
+	debugfs_slab_release(s);
+
+	if (err)
+		return;
+
+	if (rcu_set) {
+		mutex_lock(&slab_mutex);
+		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
+		schedule_work(&slab_caches_to_rcu_destroy_work);
+		mutex_unlock(&slab_mutex);
+	} else {
 		kmem_cache_release(s);
+	}
 }
 EXPORT_SYMBOL(kmem_cache_destroy);