diff mbox

[v3,2/7] mm, slab/slub: introduce kmalloc-reclaimable caches

Message ID 20180718133620.6205-3-vbabka@suse.cz (mailing list archive)
State New, archived
Headers show

Commit Message

Vlastimil Babka July 18, 2018, 1:36 p.m. UTC
Kmem caches can be created with a SLAB_RECLAIM_ACCOUNT flag, which indicates
they contain objects which can be reclaimed under memory pressure (typically
through a shrinker). This makes the slab pages accounted as NR_SLAB_RECLAIMABLE
in vmstat, which is reflected also the MemAvailable meminfo counter and in
overcommit decisions. The slab pages are also allocated with __GFP_RECLAIMABLE,
which is good for anti-fragmentation through grouping pages by mobility.

The generic kmalloc-X caches are created without this flag, but sometimes are
used also for objects that can be reclaimed, which due to varying size cannot
have a dedicated kmem cache with SLAB_RECLAIM_ACCOUNT flag. A prominent example
are dcache external names, which prompted the creation of a new, manually
managed vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES in commit f1782c9bc547
("dcache: account external names as indirectly reclaimable memory").

To better handle this and any other similar cases, this patch introduces
SLAB_RECLAIM_ACCOUNT variants of kmalloc caches, named kmalloc-rcl-X.
They are used whenever the kmalloc() call passes __GFP_RECLAIMABLE among gfp
flags. They are added to the kmalloc_caches array as a new type. Allocations
with both __GFP_DMA and __GFP_RECLAIMABLE will use a dma type cache.

This change only applies to SLAB and SLUB, not SLOB. This is fine, since SLOB's
target are tiny system and this patch does add some overhead of kmem management
objects.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 16 +++++++++++----
 mm/slab_common.c     | 48 ++++++++++++++++++++++++++++----------------
 2 files changed, 43 insertions(+), 21 deletions(-)

Comments

Mel Gorman July 19, 2018, 8:23 a.m. UTC | #1
On Wed, Jul 18, 2018 at 03:36:15PM +0200, Vlastimil Babka wrote:
> Kmem caches can be created with a SLAB_RECLAIM_ACCOUNT flag, which indicates
> they contain objects which can be reclaimed under memory pressure (typically
> through a shrinker). This makes the slab pages accounted as NR_SLAB_RECLAIMABLE
> in vmstat, which is reflected also the MemAvailable meminfo counter and in
> overcommit decisions. The slab pages are also allocated with __GFP_RECLAIMABLE,
> which is good for anti-fragmentation through grouping pages by mobility.
> 
> The generic kmalloc-X caches are created without this flag, but sometimes are
> used also for objects that can be reclaimed, which due to varying size cannot
> have a dedicated kmem cache with SLAB_RECLAIM_ACCOUNT flag. A prominent example
> are dcache external names, which prompted the creation of a new, manually
> managed vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES in commit f1782c9bc547
> ("dcache: account external names as indirectly reclaimable memory").
> 
> To better handle this and any other similar cases, this patch introduces
> SLAB_RECLAIM_ACCOUNT variants of kmalloc caches, named kmalloc-rcl-X.
> They are used whenever the kmalloc() call passes __GFP_RECLAIMABLE among gfp
> flags. They are added to the kmalloc_caches array as a new type. Allocations
> with both __GFP_DMA and __GFP_RECLAIMABLE will use a dma type cache.
> 
> This change only applies to SLAB and SLUB, not SLOB. This is fine, since SLOB's
> target are tiny system and this patch does add some overhead of kmem management
> objects.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>
> <SNIP>
>
> @@ -309,12 +310,19 @@ extern struct kmem_cache *kmalloc_caches[KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
>  static __always_inline unsigned int kmalloc_type(gfp_t flags)
>  {
>  	int is_dma = 0;
> +	int is_reclaimable;
>  
>  #ifdef CONFIG_ZONE_DMA
>  	is_dma = !!(flags & __GFP_DMA);
>  #endif
>  
> -	return is_dma;
> +	is_reclaimable = !!(flags & __GFP_RECLAIMABLE);
> +
> +	/*
> +	 * If an allocation is botth __GFP_DMA and __GFP_RECLAIMABLE, return
> +	 * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE
> +	 */
> +	return (is_dma * 2) + (is_reclaimable & !is_dma);
>  }
>  

s/botth/both/



>  /*
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 4614248ca381..614fb7ab8312 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1107,10 +1107,21 @@ void __init setup_kmalloc_cache_index_table(void)
>  	}
>  }
>  
> -static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
> +static void __init
> +new_kmalloc_cache(int idx, int type, slab_flags_t flags)
>  {
> -	kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
> -					kmalloc_info[idx].name,
> +	const char *name;
> +
> +	if (type == KMALLOC_RECLAIM) {
> +		flags |= SLAB_RECLAIM_ACCOUNT;
> +		name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%u",
> +						kmalloc_info[idx].size);
> +		BUG_ON(!name);
> +	} else {
> +		name = kmalloc_info[idx].name;
> +	}
> +
> +	kmalloc_caches[type][idx] = create_kmalloc_cache(name,
>  					kmalloc_info[idx].size, flags, 0,
>  					kmalloc_info[idx].size);
>  }

I was going to query that BUG_ON but if I'm reading it right, we just
have to be careful in the future that the "normal" kmalloc cache is always
initialised before the reclaimable cache or there will be issues.

> @@ -1122,22 +1133,25 @@ static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
>   */
>  void __init create_kmalloc_caches(slab_flags_t flags)
>  {
> -	int i;
> -	int type = KMALLOC_NORMAL;
> +	int i, type;
>  
> -	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
> -		if (!kmalloc_caches[type][i])
> -			new_kmalloc_cache(i, flags);
> +	for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
> +		for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
> +			if (!kmalloc_caches[type][i])
> +				new_kmalloc_cache(i, type, flags);
>  

I don't see a problem here as such but the values of the KMALLOC_* types
is important both for this function and the kmalloc_type(). It might be
worth adding a warning that these functions be examined if updating the
types but then again, anyone trying and getting it wrong will have a
broken kernel so;

Acked-by: Mel Gorman <mgorman@techsingularity.net>
Roman Gushchin July 19, 2018, 6:16 p.m. UTC | #2
On Wed, Jul 18, 2018 at 03:36:15PM +0200, Vlastimil Babka wrote:
> Kmem caches can be created with a SLAB_RECLAIM_ACCOUNT flag, which indicates
> they contain objects which can be reclaimed under memory pressure (typically
> through a shrinker). This makes the slab pages accounted as NR_SLAB_RECLAIMABLE
> in vmstat, which is reflected also the MemAvailable meminfo counter and in
> overcommit decisions. The slab pages are also allocated with __GFP_RECLAIMABLE,
> which is good for anti-fragmentation through grouping pages by mobility.
> 
> The generic kmalloc-X caches are created without this flag, but sometimes are
> used also for objects that can be reclaimed, which due to varying size cannot
> have a dedicated kmem cache with SLAB_RECLAIM_ACCOUNT flag. A prominent example
> are dcache external names, which prompted the creation of a new, manually
> managed vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES in commit f1782c9bc547
> ("dcache: account external names as indirectly reclaimable memory").
> 
> To better handle this and any other similar cases, this patch introduces
> SLAB_RECLAIM_ACCOUNT variants of kmalloc caches, named kmalloc-rcl-X.
> They are used whenever the kmalloc() call passes __GFP_RECLAIMABLE among gfp
> flags. They are added to the kmalloc_caches array as a new type. Allocations
> with both __GFP_DMA and __GFP_RECLAIMABLE will use a dma type cache.
> 
> This change only applies to SLAB and SLUB, not SLOB. This is fine, since SLOB's
> target are tiny system and this patch does add some overhead of kmem management
> objects.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  include/linux/slab.h | 16 +++++++++++----
>  mm/slab_common.c     | 48 ++++++++++++++++++++++++++++----------------
>  2 files changed, 43 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 4299c59353a1..d89e934e0d8b 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -296,11 +296,12 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
>                                 (KMALLOC_MIN_SIZE) : 16)
>  
>  #define KMALLOC_NORMAL	0
> +#define KMALLOC_RECLAIM	1
>  #ifdef CONFIG_ZONE_DMA
> -#define KMALLOC_DMA	1
> -#define KMALLOC_TYPES	2
> +#define KMALLOC_DMA	2
> +#define KMALLOC_TYPES	3
>  #else
> -#define KMALLOC_TYPES	1
> +#define KMALLOC_TYPES	2
>  #endif
>  
>  #ifndef CONFIG_SLOB
> @@ -309,12 +310,19 @@ extern struct kmem_cache *kmalloc_caches[KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
>  static __always_inline unsigned int kmalloc_type(gfp_t flags)
>  {
>  	int is_dma = 0;
> +	int is_reclaimable;
>  
>  #ifdef CONFIG_ZONE_DMA
>  	is_dma = !!(flags & __GFP_DMA);
>  #endif
>  
> -	return is_dma;
> +	is_reclaimable = !!(flags & __GFP_RECLAIMABLE);
> +
> +	/*
> +	 * If an allocation is botth __GFP_DMA and __GFP_RECLAIMABLE, return
                                 ^^
			       typo
> +	 * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE
> +	 */
> +	return (is_dma * 2) + (is_reclaimable & !is_dma);

Maybe
is_dma * KMALLOC_DMA + (is_reclaimable && !is_dma) * KMALLOC_RECLAIM
looks better?

>  }
>  
>  /*
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 4614248ca381..614fb7ab8312 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1107,10 +1107,21 @@ void __init setup_kmalloc_cache_index_table(void)
>  	}
>  }
>  
> -static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
> +static void __init
> +new_kmalloc_cache(int idx, int type, slab_flags_t flags)
>  {
> -	kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
> -					kmalloc_info[idx].name,
> +	const char *name;
> +
> +	if (type == KMALLOC_RECLAIM) {
> +		flags |= SLAB_RECLAIM_ACCOUNT;
> +		name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%u",
> +						kmalloc_info[idx].size);
> +		BUG_ON(!name);

I'd replace this with WARN_ON() and falling back to kmalloc_info[idx].name.

> +	} else {
> +		name = kmalloc_info[idx].name;
> +	}
> +
> +	kmalloc_caches[type][idx] = create_kmalloc_cache(name,
>  					kmalloc_info[idx].size, flags, 0,
>  					kmalloc_info[idx].size);
>  }
Vlastimil Babka July 20, 2018, 9:32 a.m. UTC | #3
On 07/19/2018 10:23 AM, Mel Gorman wrote:
>>  /*
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index 4614248ca381..614fb7ab8312 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -1107,10 +1107,21 @@ void __init setup_kmalloc_cache_index_table(void)
>>  	}
>>  }
>>  
>> -static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
>> +static void __init
>> +new_kmalloc_cache(int idx, int type, slab_flags_t flags)
>>  {
>> -	kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
>> -					kmalloc_info[idx].name,
>> +	const char *name;
>> +
>> +	if (type == KMALLOC_RECLAIM) {
>> +		flags |= SLAB_RECLAIM_ACCOUNT;
>> +		name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%u",
>> +						kmalloc_info[idx].size);
>> +		BUG_ON(!name);
>> +	} else {
>> +		name = kmalloc_info[idx].name;
>> +	}
>> +
>> +	kmalloc_caches[type][idx] = create_kmalloc_cache(name,
>>  					kmalloc_info[idx].size, flags, 0,
>>  					kmalloc_info[idx].size);
>>  }
> 
> I was going to query that BUG_ON but if I'm reading it right, we just
> have to be careful in the future that the "normal" kmalloc cache is always
> initialised before the reclaimable cache or there will be issues.

Yeah, I was just copying how the dma-kmalloc code does it.

>> @@ -1122,22 +1133,25 @@ static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
>>   */
>>  void __init create_kmalloc_caches(slab_flags_t flags)
>>  {
>> -	int i;
>> -	int type = KMALLOC_NORMAL;
>> +	int i, type;
>>  
>> -	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
>> -		if (!kmalloc_caches[type][i])
>> -			new_kmalloc_cache(i, flags);
>> +	for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
>> +		for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
>> +			if (!kmalloc_caches[type][i])
>> +				new_kmalloc_cache(i, type, flags);
>>  
> 
> I don't see a problem here as such but the values of the KMALLOC_* types
> is important both for this function and the kmalloc_type(). It might be
> worth adding a warning that these functions be examined if updating the
> types but then again, anyone trying and getting it wrong will have a
> broken kernel so;

OK

> Acked-by: Mel Gorman <mgorman@techsingularity.net>

Thanks!
Vlastimil Babka July 20, 2018, 9:35 a.m. UTC | #4
On 07/19/2018 08:16 PM, Roman Gushchin wrote:
>>  	is_dma = !!(flags & __GFP_DMA);
>>  #endif
>>  
>> -	return is_dma;
>> +	is_reclaimable = !!(flags & __GFP_RECLAIMABLE);
>> +
>> +	/*
>> +	 * If an allocation is botth __GFP_DMA and __GFP_RECLAIMABLE, return
>                                  ^^
> 			       typo
>> +	 * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE
>> +	 */
>> +	return (is_dma * 2) + (is_reclaimable & !is_dma);
> 
> Maybe
> is_dma * KMALLOC_DMA + (is_reclaimable && !is_dma) * KMALLOC_RECLAIM
> looks better?

I think I meant to do that but forgot, thanks.

>>  }
>>  
>>  /*
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index 4614248ca381..614fb7ab8312 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -1107,10 +1107,21 @@ void __init setup_kmalloc_cache_index_table(void)
>>  	}
>>  }
>>  
>> -static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
>> +static void __init
>> +new_kmalloc_cache(int idx, int type, slab_flags_t flags)
>>  {
>> -	kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
>> -					kmalloc_info[idx].name,
>> +	const char *name;
>> +
>> +	if (type == KMALLOC_RECLAIM) {
>> +		flags |= SLAB_RECLAIM_ACCOUNT;
>> +		name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%u",
>> +						kmalloc_info[idx].size);
>> +		BUG_ON(!name);
> 
> I'd replace this with WARN_ON() and falling back to kmalloc_info[idx].name.

It's basically a copy/paste of the dma-kmalloc code. If that triggers,
it means somebody was changing the code and introduced a wrong order (as
Mel said). A system that genuinely has no memory for that printf at this
point, would not get very far anyway...

>> +	} else {
>> +		name = kmalloc_info[idx].name;
>> +	}
>> +
>> +	kmalloc_caches[type][idx] = create_kmalloc_cache(name,
>>  					kmalloc_info[idx].size, flags, 0,
>>  					kmalloc_info[idx].size);
>>  }
Christoph Lameter (Ampere) July 30, 2018, 3:41 p.m. UTC | #5
On Wed, 18 Jul 2018, Vlastimil Babka wrote:

> index 4299c59353a1..d89e934e0d8b 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -296,11 +296,12 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
>                                 (KMALLOC_MIN_SIZE) : 16)
>
>  #define KMALLOC_NORMAL	0
> +#define KMALLOC_RECLAIM	1
>  #ifdef CONFIG_ZONE_DMA
> -#define KMALLOC_DMA	1
> -#define KMALLOC_TYPES	2
> +#define KMALLOC_DMA	2
> +#define KMALLOC_TYPES	3
>  #else
> -#define KMALLOC_TYPES	1
> +#define KMALLOC_TYPES	2
>  #endif

I like enums....

Acked-by: Christoph Lameter <cl@linux.com>
diff mbox

Patch

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4299c59353a1..d89e934e0d8b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -296,11 +296,12 @@  static inline void __check_heap_object(const void *ptr, unsigned long n,
                                (KMALLOC_MIN_SIZE) : 16)
 
 #define KMALLOC_NORMAL	0
+#define KMALLOC_RECLAIM	1
 #ifdef CONFIG_ZONE_DMA
-#define KMALLOC_DMA	1
-#define KMALLOC_TYPES	2
+#define KMALLOC_DMA	2
+#define KMALLOC_TYPES	3
 #else
-#define KMALLOC_TYPES	1
+#define KMALLOC_TYPES	2
 #endif
 
 #ifndef CONFIG_SLOB
@@ -309,12 +310,19 @@  extern struct kmem_cache *kmalloc_caches[KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
 static __always_inline unsigned int kmalloc_type(gfp_t flags)
 {
 	int is_dma = 0;
+	int is_reclaimable;
 
 #ifdef CONFIG_ZONE_DMA
 	is_dma = !!(flags & __GFP_DMA);
 #endif
 
-	return is_dma;
+	is_reclaimable = !!(flags & __GFP_RECLAIMABLE);
+
+	/*
+	 * If an allocation is botth __GFP_DMA and __GFP_RECLAIMABLE, return
+	 * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE
+	 */
+	return (is_dma * 2) + (is_reclaimable & !is_dma);
 }
 
 /*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 4614248ca381..614fb7ab8312 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1107,10 +1107,21 @@  void __init setup_kmalloc_cache_index_table(void)
 	}
 }
 
-static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
+static void __init
+new_kmalloc_cache(int idx, int type, slab_flags_t flags)
 {
-	kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
-					kmalloc_info[idx].name,
+	const char *name;
+
+	if (type == KMALLOC_RECLAIM) {
+		flags |= SLAB_RECLAIM_ACCOUNT;
+		name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%u",
+						kmalloc_info[idx].size);
+		BUG_ON(!name);
+	} else {
+		name = kmalloc_info[idx].name;
+	}
+
+	kmalloc_caches[type][idx] = create_kmalloc_cache(name,
 					kmalloc_info[idx].size, flags, 0,
 					kmalloc_info[idx].size);
 }
@@ -1122,22 +1133,25 @@  static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
  */
 void __init create_kmalloc_caches(slab_flags_t flags)
 {
-	int i;
-	int type = KMALLOC_NORMAL;
+	int i, type;
 
-	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
-		if (!kmalloc_caches[type][i])
-			new_kmalloc_cache(i, flags);
+	for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
+		for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
+			if (!kmalloc_caches[type][i])
+				new_kmalloc_cache(i, type, flags);
 
-		/*
-		 * Caches that are not of the two-to-the-power-of size.
-		 * These have to be created immediately after the
-		 * earlier power of two caches
-		 */
-		if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[type][1] && i == 6)
-			new_kmalloc_cache(1, flags);
-		if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[type][2] && i == 7)
-			new_kmalloc_cache(2, flags);
+			/*
+			 * Caches that are not of the two-to-the-power-of size.
+			 * These have to be created immediately after the
+			 * earlier power of two caches
+			 */
+			if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
+					!kmalloc_caches[type][1])
+				new_kmalloc_cache(1, type, flags);
+			if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
+					!kmalloc_caches[type][2])
+				new_kmalloc_cache(2, type, flags);
+		}
 	}
 
 	/* Kmalloc array is now usable */