diff mbox series

[1/3] mm/slub: Fix slabs_node return value when CONFIG_SLUB_DEBUG disabled

Message ID 20200614123923.99189-2-songmuchun@bytedance.com (mailing list archive)
State New, archived
Headers show
Series mm/slub: Fix slabs_node return value | expand

Commit Message

Muchun Song June 14, 2020, 12:39 p.m. UTC
The slabs_node() always return zero when CONFIG_SLUB_DEBUG is disabled.
But some codes determine whether slab is empty by checking the return
value of slabs_node(). As you know, the result is not correct. This
problem can be reproduce by the follow code(and boot system with the
cmdline of "slub_nomerge"):

  void *objs[32];
  struct kmem_cache *cache = kmem_cache_create("kmem-test", 128, 0,
			0, 0);

  if (cache) {
  	int i;

	/* Make a full slab */
  	for (i = 0; i < ARRAY_SIZE(objs); i++)
		objs[i] = kmem_cache_alloc(cache, GFP_KERNEL_ACCOUNT);

	/*
  	 * This really should fail because the slab cache still has
  	 * objects. But we did destroy the @cache because of zero
  	 * returned by slabs_node().
  	 */
  	kmem_cache_destroy(cache);
  }

To fix it, we can move the nr_slabs of kmem_cache_node out of the
CONFIG_SLUB_DEBUG. So we can get the corrent value returned by the
slabs_node().

With this patch applied, we will get a warning message and stack
trace in the dmesg.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/slab.h |  2 +-
 mm/slub.c | 80 +++++++++++++++++++++++++++++++++------------------------------
 2 files changed, 43 insertions(+), 39 deletions(-)

Comments

Joonsoo Kim June 15, 2020, 6:11 a.m. UTC | #1
2020년 6월 14일 (일) 오후 9:39, Muchun Song <songmuchun@bytedance.com>님이 작성:
>
> The slabs_node() always return zero when CONFIG_SLUB_DEBUG is disabled.
> But some codes determine whether slab is empty by checking the return
> value of slabs_node(). As you know, the result is not correct. This
> problem can be reproduce by the follow code(and boot system with the
> cmdline of "slub_nomerge"):
>
>   void *objs[32];
>   struct kmem_cache *cache = kmem_cache_create("kmem-test", 128, 0,
>                         0, 0);
>
>   if (cache) {
>         int i;
>
>         /* Make a full slab */
>         for (i = 0; i < ARRAY_SIZE(objs); i++)
>                 objs[i] = kmem_cache_alloc(cache, GFP_KERNEL_ACCOUNT);
>
>         /*
>          * This really should fail because the slab cache still has
>          * objects. But we did destroy the @cache because of zero
>          * returned by slabs_node().
>          */
>         kmem_cache_destroy(cache);
>   }
>
> To fix it, we can move the nr_slabs of kmem_cache_node out of the
> CONFIG_SLUB_DEBUG. So we can get the corrent value returned by the
> slabs_node().
>
> With this patch applied, we will get a warning message and stack
> trace in the dmesg.
>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
>  mm/slab.h |  2 +-
>  mm/slub.c | 80 +++++++++++++++++++++++++++++++++------------------------------
>  2 files changed, 43 insertions(+), 39 deletions(-)
>
> diff --git a/mm/slab.h b/mm/slab.h
> index 0b91f2a7b033..062d4542b7e2 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -619,8 +619,8 @@ struct kmem_cache_node {
>  #ifdef CONFIG_SLUB
>         unsigned long nr_partial;
>         struct list_head partial;
> -#ifdef CONFIG_SLUB_DEBUG
>         atomic_long_t nr_slabs;
> +#ifdef CONFIG_SLUB_DEBUG
>         atomic_long_t total_objects;
>         struct list_head full;
>  #endif
> diff --git a/mm/slub.c b/mm/slub.c
> index 49b5cb7da318..1a3e6a5b7287 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c

Hello,

You also need to initialize nr_slabs in init_kmem_cache_node()
on !CONFIG_SLUB_DEBUG.

Otherwise, looks good to me.

Thanks.
Muchun Song June 15, 2020, 6:26 a.m. UTC | #2
On Mon, Jun 15, 2020 at 2:11 PM Joonsoo Kim <js1304@gmail.com> wrote:
>
> 2020년 6월 14일 (일) 오후 9:39, Muchun Song <songmuchun@bytedance.com>님이 작성:
> >
> > The slabs_node() always return zero when CONFIG_SLUB_DEBUG is disabled.
> > But some codes determine whether slab is empty by checking the return
> > value of slabs_node(). As you know, the result is not correct. This
> > problem can be reproduce by the follow code(and boot system with the
> > cmdline of "slub_nomerge"):
> >
> >   void *objs[32];
> >   struct kmem_cache *cache = kmem_cache_create("kmem-test", 128, 0,
> >                         0, 0);
> >
> >   if (cache) {
> >         int i;
> >
> >         /* Make a full slab */
> >         for (i = 0; i < ARRAY_SIZE(objs); i++)
> >                 objs[i] = kmem_cache_alloc(cache, GFP_KERNEL_ACCOUNT);
> >
> >         /*
> >          * This really should fail because the slab cache still has
> >          * objects. But we did destroy the @cache because of zero
> >          * returned by slabs_node().
> >          */
> >         kmem_cache_destroy(cache);
> >   }
> >
> > To fix it, we can move the nr_slabs of kmem_cache_node out of the
> > CONFIG_SLUB_DEBUG. So we can get the corrent value returned by the
> > slabs_node().
> >
> > With this patch applied, we will get a warning message and stack
> > trace in the dmesg.
> >
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > ---
> >  mm/slab.h |  2 +-
> >  mm/slub.c | 80 +++++++++++++++++++++++++++++++++------------------------------
> >  2 files changed, 43 insertions(+), 39 deletions(-)
> >
> > diff --git a/mm/slab.h b/mm/slab.h
> > index 0b91f2a7b033..062d4542b7e2 100644
> > --- a/mm/slab.h
> > +++ b/mm/slab.h
> > @@ -619,8 +619,8 @@ struct kmem_cache_node {
> >  #ifdef CONFIG_SLUB
> >         unsigned long nr_partial;
> >         struct list_head partial;
> > -#ifdef CONFIG_SLUB_DEBUG
> >         atomic_long_t nr_slabs;
> > +#ifdef CONFIG_SLUB_DEBUG
> >         atomic_long_t total_objects;
> >         struct list_head full;
> >  #endif
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 49b5cb7da318..1a3e6a5b7287 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
>
> Hello,
>
> You also need to initialize nr_slabs in init_kmem_cache_node()
> on !CONFIG_SLUB_DEBUG.

Good catch, thanks! I will fix it in the next version.

>
> Otherwise, looks good to me.
>
> Thanks.
Vlastimil Babka June 15, 2020, 4:46 p.m. UTC | #3
On 6/14/20 2:39 PM, Muchun Song wrote:
> The slabs_node() always return zero when CONFIG_SLUB_DEBUG is disabled.
> But some codes determine whether slab is empty by checking the return
> value of slabs_node(). As you know, the result is not correct. This
> problem can be reproduce by the follow code(and boot system with the
> cmdline of "slub_nomerge"):
> 
>   void *objs[32];
>   struct kmem_cache *cache = kmem_cache_create("kmem-test", 128, 0,
> 			0, 0);
> 
>   if (cache) {
>   	int i;
> 
> 	/* Make a full slab */
>   	for (i = 0; i < ARRAY_SIZE(objs); i++)
> 		objs[i] = kmem_cache_alloc(cache, GFP_KERNEL_ACCOUNT);
> 
> 	/*
>   	 * This really should fail because the slab cache still has
>   	 * objects. But we did destroy the @cache because of zero
>   	 * returned by slabs_node().
>   	 */
>   	kmem_cache_destroy(cache);
>   }

Hmm, the whole SLUB subsystem (or even kernel) has some trust in the other
individual subsystems doing the right thing. I.e. if allocate an object once and
free it twice, bad things will happen. If you are debugging such a problem, you
enable SLUB_DEBUG (and the boot parameter).

This particular situation is the same - SLUB trusts its users to free their
objects before calling kmem_cache_destroy(). If they don't and just leak them,
then they stay leaked and nothing worse will happen (I'd expect). If they
destroy the cache and later try to e.g. free the objects, then there will most
likely be a crash.

And yeah, if you are debugging such thing, you will compile with SLUB_DEBUG and
make sure there's slub_nomerge (or you won't catch it anyway).

Hmm, but there's one exception in __kmem_cache_shrink() where the caller wants
to know if all slabs were discarded. kmem_cache_shrink() passes that to its
callers, but seems nobody cares about the return value.

The only caller that seems to care is __kmemcg_cache_deactivate_after_rcu():

        if (!__kmem_cache_shrink(s))
                sysfs_slab_remove(s);

Perhaps that will go away with memcg rework? (CC Roman). If yes, we also have
the option to stop returning the value in kmem_cache_shrink() (as nobody seems
to care), and then slabs_node() remains a debug only thing.

> To fix it, we can move the nr_slabs of kmem_cache_node out of the
> CONFIG_SLUB_DEBUG. So we can get the corrent value returned by the
> slabs_node().
> 
> With this patch applied, we will get a warning message and stack
> trace in the dmesg.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
>  mm/slab.h |  2 +-
>  mm/slub.c | 80 +++++++++++++++++++++++++++++++++------------------------------
>  2 files changed, 43 insertions(+), 39 deletions(-)
> 
> diff --git a/mm/slab.h b/mm/slab.h
> index 0b91f2a7b033..062d4542b7e2 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -619,8 +619,8 @@ struct kmem_cache_node {
>  #ifdef CONFIG_SLUB
>  	unsigned long nr_partial;
>  	struct list_head partial;
> -#ifdef CONFIG_SLUB_DEBUG
>  	atomic_long_t nr_slabs;
> +#ifdef CONFIG_SLUB_DEBUG
>  	atomic_long_t total_objects;
>  	struct list_head full;
>  #endif
> diff --git a/mm/slub.c b/mm/slub.c
> index 49b5cb7da318..1a3e6a5b7287 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1070,39 +1070,14 @@ static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct
>  	list_del(&page->slab_list);
>  }
>  
> -/* Tracking of the number of slabs for debugging purposes */
> -static inline unsigned long slabs_node(struct kmem_cache *s, int node)
> +/* Tracking of the number of objects for debugging purposes */
> +static inline void inc_objects_node(struct kmem_cache_node *n, int objects)
>  {
> -	struct kmem_cache_node *n = get_node(s, node);
> -
> -	return atomic_long_read(&n->nr_slabs);
> +	atomic_long_add(objects, &n->total_objects);
>  }
>  
> -static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
> +static inline void dec_objects_node(struct kmem_cache_node *n, int objects)
>  {
> -	return atomic_long_read(&n->nr_slabs);
> -}
> -
> -static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
> -{
> -	struct kmem_cache_node *n = get_node(s, node);
> -
> -	/*
> -	 * May be called early in order to allocate a slab for the
> -	 * kmem_cache_node structure. Solve the chicken-egg
> -	 * dilemma by deferring the increment of the count during
> -	 * bootstrap (see early_kmem_cache_node_alloc).
> -	 */
> -	if (likely(n)) {
> -		atomic_long_inc(&n->nr_slabs);
> -		atomic_long_add(objects, &n->total_objects);
> -	}
> -}
> -static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
> -{
> -	struct kmem_cache_node *n = get_node(s, node);
> -
> -	atomic_long_dec(&n->nr_slabs);
>  	atomic_long_sub(objects, &n->total_objects);
>  }
>  
> @@ -1413,15 +1388,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
>  
>  #define disable_higher_order_debug 0
>  
> -static inline unsigned long slabs_node(struct kmem_cache *s, int node)
> -							{ return 0; }
> -static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
> -							{ return 0; }
> -static inline void inc_slabs_node(struct kmem_cache *s, int node,
> -							int objects) {}
> -static inline void dec_slabs_node(struct kmem_cache *s, int node,
> -							int objects) {}
> -
> +static inline void inc_objects_node(struct kmem_cache_node *n, int objects) {}
> +static inline void dec_objects_node(struct kmem_cache_node *n, int objects) {}
>  static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
>  			       void *freelist, void *nextfree)
>  {
> @@ -1429,6 +1397,42 @@ static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
>  }
>  #endif /* CONFIG_SLUB_DEBUG */
>  
> +static inline unsigned long slabs_node(struct kmem_cache *s, int node)
> +{
> +	struct kmem_cache_node *n = get_node(s, node);
> +
> +	return atomic_long_read(&n->nr_slabs);
> +}
> +
> +static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
> +{
> +	return atomic_long_read(&n->nr_slabs);
> +}
> +
> +static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
> +{
> +	struct kmem_cache_node *n = get_node(s, node);
> +
> +	/*
> +	 * May be called early in order to allocate a slab for the
> +	 * kmem_cache_node structure. Solve the chicken-egg
> +	 * dilemma by deferring the increment of the count during
> +	 * bootstrap (see early_kmem_cache_node_alloc).
> +	 */
> +	if (likely(n)) {
> +		atomic_long_inc(&n->nr_slabs);
> +		inc_objects_node(n, objects);
> +	}
> +}
> +
> +static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
> +{
> +	struct kmem_cache_node *n = get_node(s, node);
> +
> +	atomic_long_dec(&n->nr_slabs);
> +	dec_objects_node(n, objects);
> +}
> +
>  /*
>   * Hooks for other subsystems that check memory allocations. In a typical
>   * production configuration these hooks all should produce no code at all.
>
diff mbox series

Patch

diff --git a/mm/slab.h b/mm/slab.h
index 0b91f2a7b033..062d4542b7e2 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -619,8 +619,8 @@  struct kmem_cache_node {
 #ifdef CONFIG_SLUB
 	unsigned long nr_partial;
 	struct list_head partial;
-#ifdef CONFIG_SLUB_DEBUG
 	atomic_long_t nr_slabs;
+#ifdef CONFIG_SLUB_DEBUG
 	atomic_long_t total_objects;
 	struct list_head full;
 #endif
diff --git a/mm/slub.c b/mm/slub.c
index 49b5cb7da318..1a3e6a5b7287 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1070,39 +1070,14 @@  static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct
 	list_del(&page->slab_list);
 }
 
-/* Tracking of the number of slabs for debugging purposes */
-static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+/* Tracking of the number of objects for debugging purposes */
+static inline void inc_objects_node(struct kmem_cache_node *n, int objects)
 {
-	struct kmem_cache_node *n = get_node(s, node);
-
-	return atomic_long_read(&n->nr_slabs);
+	atomic_long_add(objects, &n->total_objects);
 }
 
-static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
+static inline void dec_objects_node(struct kmem_cache_node *n, int objects)
 {
-	return atomic_long_read(&n->nr_slabs);
-}
-
-static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
-{
-	struct kmem_cache_node *n = get_node(s, node);
-
-	/*
-	 * May be called early in order to allocate a slab for the
-	 * kmem_cache_node structure. Solve the chicken-egg
-	 * dilemma by deferring the increment of the count during
-	 * bootstrap (see early_kmem_cache_node_alloc).
-	 */
-	if (likely(n)) {
-		atomic_long_inc(&n->nr_slabs);
-		atomic_long_add(objects, &n->total_objects);
-	}
-}
-static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
-{
-	struct kmem_cache_node *n = get_node(s, node);
-
-	atomic_long_dec(&n->nr_slabs);
 	atomic_long_sub(objects, &n->total_objects);
 }
 
@@ -1413,15 +1388,8 @@  slab_flags_t kmem_cache_flags(unsigned int object_size,
 
 #define disable_higher_order_debug 0
 
-static inline unsigned long slabs_node(struct kmem_cache *s, int node)
-							{ return 0; }
-static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
-							{ return 0; }
-static inline void inc_slabs_node(struct kmem_cache *s, int node,
-							int objects) {}
-static inline void dec_slabs_node(struct kmem_cache *s, int node,
-							int objects) {}
-
+static inline void inc_objects_node(struct kmem_cache_node *n, int objects) {}
+static inline void dec_objects_node(struct kmem_cache_node *n, int objects) {}
 static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
 			       void *freelist, void *nextfree)
 {
@@ -1429,6 +1397,42 @@  static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
 }
 #endif /* CONFIG_SLUB_DEBUG */
 
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+{
+	struct kmem_cache_node *n = get_node(s, node);
+
+	return atomic_long_read(&n->nr_slabs);
+}
+
+static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
+{
+	return atomic_long_read(&n->nr_slabs);
+}
+
+static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
+{
+	struct kmem_cache_node *n = get_node(s, node);
+
+	/*
+	 * May be called early in order to allocate a slab for the
+	 * kmem_cache_node structure. Solve the chicken-egg
+	 * dilemma by deferring the increment of the count during
+	 * bootstrap (see early_kmem_cache_node_alloc).
+	 */
+	if (likely(n)) {
+		atomic_long_inc(&n->nr_slabs);
+		inc_objects_node(n, objects);
+	}
+}
+
+static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
+{
+	struct kmem_cache_node *n = get_node(s, node);
+
+	atomic_long_dec(&n->nr_slabs);
+	dec_objects_node(n, objects);
+}
+
 /*
  * Hooks for other subsystems that check memory allocations. In a typical
  * production configuration these hooks all should produce no code at all.