diff mbox series

[v2,1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator

Message ID 20220304063427.372145-2-42.hyeyoo@gmail.com (mailing list archive)
State New
Headers show
Series slab cleanups | expand

Commit Message

Hyeonggon Yoo March 4, 2022, 6:34 a.m. UTC
There is not much benefit for serving large objects in kmalloc().
Let's pass large requests to page allocator like SLUB for better
maintenance of common code.

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 include/linux/slab.h | 35 ++++++++++++++++-------------------
 mm/slab.c            | 31 +++++++++++++++++++++++++++----
 mm/slab.h            | 19 +++++++++++++++++++
 mm/slub.c            | 19 -------------------
 4 files changed, 62 insertions(+), 42 deletions(-)

Comments

Vlastimil Babka March 4, 2022, 12:45 p.m. UTC | #1
On 3/4/22 07:34, Hyeonggon Yoo wrote:
> There is not much benefit for serving large objects in kmalloc().
> Let's pass large requests to page allocator like SLUB for better
> maintenance of common code.
> 
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Looks like it's on the right path! Some suggestions follow.

> ---
>  include/linux/slab.h | 35 ++++++++++++++++-------------------
>  mm/slab.c            | 31 +++++++++++++++++++++++++++----
>  mm/slab.h            | 19 +++++++++++++++++++
>  mm/slub.c            | 19 -------------------
>  4 files changed, 62 insertions(+), 42 deletions(-)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 37bde99b74af..e7b3330db4f3 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -224,29 +224,19 @@ void kmem_dump_obj(void *object);
>   * Kmalloc array related definitions
>   */
>  
> -#ifdef CONFIG_SLAB
>  /*
> - * The largest kmalloc size supported by the SLAB allocators is
> - * 32 megabyte (2^25) or the maximum allocatable page order if that is
> - * less than 32 MB.
> - *
> - * WARNING: Its not easy to increase this value since the allocators have
> - * to do various tricks to work around compiler limitations in order to
> - * ensure proper constant folding.
> + * SLAB and SLUB directly allocates requests fitting in to an order-1 page
> + * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
>   */
> -#define KMALLOC_SHIFT_HIGH	((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
> -				(MAX_ORDER + PAGE_SHIFT - 1) : 25)
> -#define KMALLOC_SHIFT_MAX	KMALLOC_SHIFT_HIGH
> +#ifdef CONFIG_SLAB
> +#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
> +#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)

I think we should be able to also remove the larger sizes from
__kmalloc_index() later in this file.

>  #ifndef KMALLOC_SHIFT_LOW
>  #define KMALLOC_SHIFT_LOW	5
>  #endif
>  #endif
>  
>  #ifdef CONFIG_SLUB
> -/*
> - * SLUB directly allocates requests fitting in to an order-1 page
> - * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
> - */
>  #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
>  #define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
>  #ifndef KMALLOC_SHIFT_LOW
> @@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
>   *	Try really hard to succeed the allocation but fail
>   *	eventually.
>   */
> +#ifndef CONFIG_SLOB
>  static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
>  {
>  	if (__builtin_constant_p(size)) {
> -#ifndef CONFIG_SLOB
>  		unsigned int index;
> -#endif
> +
>  		if (size > KMALLOC_MAX_CACHE_SIZE)
>  			return kmalloc_large(size, flags);
> -#ifndef CONFIG_SLOB
> +
>  		index = kmalloc_index(size);
>  
>  		if (!index)
> @@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
>  		return kmem_cache_alloc_trace(
>  				kmalloc_caches[kmalloc_type(flags)][index],
>  				flags, size);
> -#endif
>  	}
>  	return __kmalloc(size, flags);
>  }
> +#else
> +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> +{
> +	if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
> +		return kmalloc_large(size, flags);
> +	return __kmalloc(size, flags);
> +}
> +#endif
>  
>  static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
>  {
> diff --git a/mm/slab.c b/mm/slab.c
> index ddf5737c63d9..570af6dc3478 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
>  	void *ret;
>  
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> -		return NULL;
> +		return kmalloc_large(size, flags);

This loses the node information and NUMA locality. It would be better to
generalize kmalloc_large_node() from mm/slub.c instead.

> +
>  	cachep = kmalloc_slab(size, flags);
>  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
>  		return cachep;
> @@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
>  	void *ret;
>  
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> -		return NULL;
> +		return kmalloc_large(size, flags);
> +
>  	cachep = kmalloc_slab(size, flags);
>  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
>  		return cachep;
> @@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
>  {
>  	struct kmem_cache *s;
>  	size_t i;
> +	struct folio *folio;
>  
>  	local_irq_disable();
>  	for (i = 0; i < size; i++) {
>  		void *objp = p[i];
>  
> -		if (!orig_s) /* called via kfree_bulk */
> +		if (!orig_s) {
> +			/* called via kfree_bulk */
> +			folio = virt_to_folio(objp);
> +			if (unlikely(!folio_test_slab(folio))) {
> +				free_large_kmalloc(folio, objp);

Hmm I'm not sure it's a good idea to call this and the page allocator with
disabled irq's. Maybe simply re-enable around it?

> +				continue;
> +			}
>  			s = virt_to_cache(objp);

Since we already have the folio, we shoold instead use folio_slab and
slab->slab_cache (see how mm/slub.c does it).
The same applies to functions below as well.

> -		else
> +		} else
>  			s = cache_from_obj(orig_s, objp);
>  		if (!s)
>  			continue;
> @@ -3776,11 +3785,20 @@ void kfree(const void *objp)
>  {
>  	struct kmem_cache *c;
>  	unsigned long flags;
> +	struct folio *folio;
> +	void *object = (void *) objp;
>  
>  	trace_kfree(_RET_IP_, objp);
>  
>  	if (unlikely(ZERO_OR_NULL_PTR(objp)))
>  		return;
> +
> +	folio = virt_to_folio(objp);
> +	if (unlikely(!folio_test_slab(folio))) {
> +		free_large_kmalloc(folio, object);
> +		return;
> +	}
> +
>  	local_irq_save(flags);
>  	kfree_debugcheck(objp);
>  	c = virt_to_cache(objp);
> @@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
>  size_t __ksize(const void *objp)
>  {
>  	struct kmem_cache *c;
> +	struct folio *folio;
>  	size_t size;
>  
>  	BUG_ON(!objp);
>  	if (unlikely(objp == ZERO_SIZE_PTR))
>  		return 0;
>  
> +	folio = virt_to_folio(objp);
> +	if (!folio_test_slab(folio))
> +		return folio_size(folio);
> +
>  	c = virt_to_cache(objp);
>  	size = c ? c->object_size : 0;
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index c7f2abc2b154..31e98beb47a3 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
>  		print_tracking(cachep, x);
>  	return cachep;
>  }
> +
> +static __always_inline void kfree_hook(void *x)
> +{
> +	kmemleak_free(x);
> +	kasan_kfree_large(x);

Looks like there's only one caller of kfree_hook() so we could do that
directly there.

> +}
> +
> +static inline void free_large_kmalloc(struct folio *folio, void *object)
> +{
> +	unsigned int order = folio_order(folio);
> +
> +	if (WARN_ON_ONCE(order == 0))
> +		pr_warn_once("object pointer: 0x%p\n", object);
> +
> +	kfree_hook(object);
> +	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> +			      -(PAGE_SIZE << order));
> +	__free_pages(folio_page(folio, 0), order);
> +}
>  #endif /* CONFIG_SLOB */
>  
>  static inline size_t slab_ksize(const struct kmem_cache *s)
> diff --git a/mm/slub.c b/mm/slub.c
> index 261474092e43..04fd084f4709 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
>  	return ptr;
>  }
>  
> -static __always_inline void kfree_hook(void *x)
> -{
> -	kmemleak_free(x);
> -	kasan_kfree_large(x);
> -}
> -
>  static __always_inline bool slab_free_hook(struct kmem_cache *s,
>  						void *x, bool init)
>  {
> @@ -3535,19 +3529,6 @@ struct detached_freelist {
>  	struct kmem_cache *s;
>  };
>  
> -static inline void free_large_kmalloc(struct folio *folio, void *object)
> -{
> -	unsigned int order = folio_order(folio);
> -
> -	if (WARN_ON_ONCE(order == 0))
> -		pr_warn_once("object pointer: 0x%p\n", object);
> -
> -	kfree_hook(object);
> -	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> -			      -(PAGE_SIZE << order));
> -	__free_pages(folio_page(folio, 0), order);
> -}
> -
>  /*
>   * This function progressively scans the array with free objects (with
>   * a limited look ahead) and extract objects belonging to the same
Hyeonggon Yoo March 5, 2022, 5:10 a.m. UTC | #2
On Fri, Mar 04, 2022 at 01:45:16PM +0100, Vlastimil Babka wrote:
> On 3/4/22 07:34, Hyeonggon Yoo wrote:
> > There is not much benefit for serving large objects in kmalloc().
> > Let's pass large requests to page allocator like SLUB for better
> > maintenance of common code.
> > 
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> 
> Looks like it's on the right path! Some suggestions follow.
> 

Thank you for reviewing this!
You have really good eye.

> > ---
> >  include/linux/slab.h | 35 ++++++++++++++++-------------------
> >  mm/slab.c            | 31 +++++++++++++++++++++++++++----
> >  mm/slab.h            | 19 +++++++++++++++++++
> >  mm/slub.c            | 19 -------------------
> >  4 files changed, 62 insertions(+), 42 deletions(-)
> > 
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 37bde99b74af..e7b3330db4f3 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -224,29 +224,19 @@ void kmem_dump_obj(void *object);
> >   * Kmalloc array related definitions
> >   */
> >  
> > -#ifdef CONFIG_SLAB
> >  /*
> > - * The largest kmalloc size supported by the SLAB allocators is
> > - * 32 megabyte (2^25) or the maximum allocatable page order if that is
> > - * less than 32 MB.
> > - *
> > - * WARNING: Its not easy to increase this value since the allocators have
> > - * to do various tricks to work around compiler limitations in order to
> > - * ensure proper constant folding.
> > + * SLAB and SLUB directly allocates requests fitting in to an order-1 page
> > + * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
> >   */
> > -#define KMALLOC_SHIFT_HIGH	((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
> > -				(MAX_ORDER + PAGE_SHIFT - 1) : 25)
> > -#define KMALLOC_SHIFT_MAX	KMALLOC_SHIFT_HIGH
> > +#ifdef CONFIG_SLAB
> > +#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
> > +#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
> 
> I think we should be able to also remove the larger sizes from
> __kmalloc_index() later in this file.
>

Right. we can remove sizes larger than max(PAGE_SHIFT) + 1.

searching using cscope, maximum PAGE_SHIFT is 20 (1MB) in hexagon.
I think we can remove anything larger than 2MB.

But Hmm, I'll add something like static_assert(PAGE_SHIFT <= 20).
Let's see if any architecture/config complain this.

> >  #ifndef KMALLOC_SHIFT_LOW
> >  #define KMALLOC_SHIFT_LOW	5
> >  #endif
> >  #endif
> >  
> >  #ifdef CONFIG_SLUB
> > -/*
> > - * SLUB directly allocates requests fitting in to an order-1 page
> > - * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
> > - */
> >  #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
> >  #define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
> >  #ifndef KMALLOC_SHIFT_LOW
> > @@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
> >   *	Try really hard to succeed the allocation but fail
> >   *	eventually.
> >   */
> > +#ifndef CONFIG_SLOB
> >  static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> >  {
> >  	if (__builtin_constant_p(size)) {
> > -#ifndef CONFIG_SLOB
> >  		unsigned int index;
> > -#endif
> > +
> >  		if (size > KMALLOC_MAX_CACHE_SIZE)
> >  			return kmalloc_large(size, flags);
> > -#ifndef CONFIG_SLOB
> > +
> >  		index = kmalloc_index(size);
> >  
> >  		if (!index)
> > @@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> >  		return kmem_cache_alloc_trace(
> >  				kmalloc_caches[kmalloc_type(flags)][index],
> >  				flags, size);
> > -#endif
> >  	}
> >  	return __kmalloc(size, flags);
> >  }
> > +#else
> > +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> > +{
> > +	if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
> > +		return kmalloc_large(size, flags);
> > +	return __kmalloc(size, flags);
> > +}
> > +#endif
> >  
> >  static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
> >  {
> > diff --git a/mm/slab.c b/mm/slab.c
> > index ddf5737c63d9..570af6dc3478 100644
> > --- a/mm/slab.c
> > +++ b/mm/slab.c
> > @@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
> >  	void *ret;
> >  
> >  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> > -		return NULL;
> > +		return kmalloc_large(size, flags);
> 
> This loses the node information and NUMA locality.

Oh, my fault. Thank you for catching this.

> It would be better to generalize kmalloc_large_node() from mm/slub.c instead.

Sounds nice.

> > +
> >  	cachep = kmalloc_slab(size, flags);
> >  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
> >  		return cachep;
> > @@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
> >  	void *ret;
> >  
> >  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> > -		return NULL;
> > +		return kmalloc_large(size, flags);
> > +
> >  	cachep = kmalloc_slab(size, flags);
> >  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
> >  		return cachep;
> > @@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
> >  {
> >  	struct kmem_cache *s;
> >  	size_t i;
> > +	struct folio *folio;
> >  
> >  	local_irq_disable();
> >  	for (i = 0; i < size; i++) {
> >  		void *objp = p[i];
> >  
> > -		if (!orig_s) /* called via kfree_bulk */
> > +		if (!orig_s) {
> > +			/* called via kfree_bulk */
> > +			folio = virt_to_folio(objp);
> > +			if (unlikely(!folio_test_slab(folio))) {
> > +				free_large_kmalloc(folio, objp);
> 
> Hmm I'm not sure it's a good idea to call this and the page allocator with
> disabled irq's. Maybe simply re-enable around it?

Seems reasonable. re-enabling around here will be sufficient for this
function.

By The Way, it seems SLAB really does not care about disabling irq
even if it's unnecessary.

I would like to try reducing irq-disabled area in SLAB and how it
affects on macro/micro benchmarks after this series is done :)

> > +				continue;
> > +			}
> >  			s = virt_to_cache(objp);
> 
> Since we already have the folio, we shoold instead use folio_slab and
> slab->slab_cache (see how mm/slub.c does it).
> The same applies to functions below as well.

Right. I'll change them in v3.

> > -		else
> > +		} else
> >  			s = cache_from_obj(orig_s, objp);
> >  		if (!s)
> >  			continue;
> > @@ -3776,11 +3785,20 @@ void kfree(const void *objp)
> >  {
> >  	struct kmem_cache *c;
> >  	unsigned long flags;
> > +	struct folio *folio;
> > +	void *object = (void *) objp;
> >  
> >  	trace_kfree(_RET_IP_, objp);
> >  
> >  	if (unlikely(ZERO_OR_NULL_PTR(objp)))
> >  		return;
> > +
> > +	folio = virt_to_folio(objp);
> > +	if (unlikely(!folio_test_slab(folio))) {
> > +		free_large_kmalloc(folio, object);
> > +		return;
> > +	}
> > +
> >  	local_irq_save(flags);
> >  	kfree_debugcheck(objp);
> >  	c = virt_to_cache(objp);
> > @@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
> >  size_t __ksize(const void *objp)
> >  {
> >  	struct kmem_cache *c;
> > +	struct folio *folio;
> >  	size_t size;
> >  
> >  	BUG_ON(!objp);
> >  	if (unlikely(objp == ZERO_SIZE_PTR))
> >  		return 0;
> >  
> > +	folio = virt_to_folio(objp);
> > +	if (!folio_test_slab(folio))
> > +		return folio_size(folio);
> > +
> >  	c = virt_to_cache(objp);
> >  	size = c ? c->object_size : 0;
> >  
> > diff --git a/mm/slab.h b/mm/slab.h
> > index c7f2abc2b154..31e98beb47a3 100644
> > --- a/mm/slab.h
> > +++ b/mm/slab.h
> > @@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
> >  		print_tracking(cachep, x);
> >  	return cachep;
> >  }
> > +
> > +static __always_inline void kfree_hook(void *x)
> > +{
> > +	kmemleak_free(x);
> > +	kasan_kfree_large(x);
> 
> Looks like there's only one caller of kfree_hook() so we could do that
> directly there.
>

Right.

> > +}
> > +
> > +static inline void free_large_kmalloc(struct folio *folio, void *object)
> > +{
> > +	unsigned int order = folio_order(folio);
> > +
> > +	if (WARN_ON_ONCE(order == 0))
> > +		pr_warn_once("object pointer: 0x%p\n", object);
> > +
> > +	kfree_hook(object);
> > +	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> > +			      -(PAGE_SIZE << order));
> > +	__free_pages(folio_page(folio, 0), order);
> > +}
> >  #endif /* CONFIG_SLOB */
> >  
> >  static inline size_t slab_ksize(const struct kmem_cache *s)
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 261474092e43..04fd084f4709 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
> >  	return ptr;
> >  }
> >  
> > -static __always_inline void kfree_hook(void *x)
> > -{
> > -	kmemleak_free(x);
> > -	kasan_kfree_large(x);
> > -}
> > -
> >  static __always_inline bool slab_free_hook(struct kmem_cache *s,
> >  						void *x, bool init)
> >  {
> > @@ -3535,19 +3529,6 @@ struct detached_freelist {
> >  	struct kmem_cache *s;
> >  };
> >  
> > -static inline void free_large_kmalloc(struct folio *folio, void *object)
> > -{
> > -	unsigned int order = folio_order(folio);
> > -
> > -	if (WARN_ON_ONCE(order == 0))
> > -		pr_warn_once("object pointer: 0x%p\n", object);
> > -
> > -	kfree_hook(object);
> > -	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> > -			      -(PAGE_SIZE << order));
> > -	__free_pages(folio_page(folio, 0), order);
> > -}
> > -
> >  /*
> >   * This function progressively scans the array with free objects (with
> >   * a limited look ahead) and extract objects belonging to the same
>

Thank you so much!
diff mbox series

Patch

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 37bde99b74af..e7b3330db4f3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -224,29 +224,19 @@  void kmem_dump_obj(void *object);
  * Kmalloc array related definitions
  */
 
-#ifdef CONFIG_SLAB
 /*
- * The largest kmalloc size supported by the SLAB allocators is
- * 32 megabyte (2^25) or the maximum allocatable page order if that is
- * less than 32 MB.
- *
- * WARNING: Its not easy to increase this value since the allocators have
- * to do various tricks to work around compiler limitations in order to
- * ensure proper constant folding.
+ * SLAB and SLUB directly allocates requests fitting in to an order-1 page
+ * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
  */
-#define KMALLOC_SHIFT_HIGH	((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
-				(MAX_ORDER + PAGE_SHIFT - 1) : 25)
-#define KMALLOC_SHIFT_MAX	KMALLOC_SHIFT_HIGH
+#ifdef CONFIG_SLAB
+#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
+#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	5
 #endif
 #endif
 
 #ifdef CONFIG_SLUB
-/*
- * SLUB directly allocates requests fitting in to an order-1 page
- * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
- */
 #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
 #define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
@@ -564,15 +554,15 @@  static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
  *	Try really hard to succeed the allocation but fail
  *	eventually.
  */
+#ifndef CONFIG_SLOB
 static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
-#ifndef CONFIG_SLOB
 		unsigned int index;
-#endif
+
 		if (size > KMALLOC_MAX_CACHE_SIZE)
 			return kmalloc_large(size, flags);
-#ifndef CONFIG_SLOB
+
 		index = kmalloc_index(size);
 
 		if (!index)
@@ -581,10 +571,17 @@  static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
 		return kmem_cache_alloc_trace(
 				kmalloc_caches[kmalloc_type(flags)][index],
 				flags, size);
-#endif
 	}
 	return __kmalloc(size, flags);
 }
+#else
+static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
+{
+	if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
+		return kmalloc_large(size, flags);
+	return __kmalloc(size, flags);
+}
+#endif
 
 static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
diff --git a/mm/slab.c b/mm/slab.c
index ddf5737c63d9..570af6dc3478 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3624,7 +3624,8 @@  __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 	void *ret;
 
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
-		return NULL;
+		return kmalloc_large(size, flags);
+
 	cachep = kmalloc_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
@@ -3685,7 +3686,8 @@  static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	void *ret;
 
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
-		return NULL;
+		return kmalloc_large(size, flags);
+
 	cachep = kmalloc_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
@@ -3739,14 +3741,21 @@  void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
 {
 	struct kmem_cache *s;
 	size_t i;
+	struct folio *folio;
 
 	local_irq_disable();
 	for (i = 0; i < size; i++) {
 		void *objp = p[i];
 
-		if (!orig_s) /* called via kfree_bulk */
+		if (!orig_s) {
+			/* called via kfree_bulk */
+			folio = virt_to_folio(objp);
+			if (unlikely(!folio_test_slab(folio))) {
+				free_large_kmalloc(folio, objp);
+				continue;
+			}
 			s = virt_to_cache(objp);
-		else
+		} else
 			s = cache_from_obj(orig_s, objp);
 		if (!s)
 			continue;
@@ -3776,11 +3785,20 @@  void kfree(const void *objp)
 {
 	struct kmem_cache *c;
 	unsigned long flags;
+	struct folio *folio;
+	void *object = (void *) objp;
 
 	trace_kfree(_RET_IP_, objp);
 
 	if (unlikely(ZERO_OR_NULL_PTR(objp)))
 		return;
+
+	folio = virt_to_folio(objp);
+	if (unlikely(!folio_test_slab(folio))) {
+		free_large_kmalloc(folio, object);
+		return;
+	}
+
 	local_irq_save(flags);
 	kfree_debugcheck(objp);
 	c = virt_to_cache(objp);
@@ -4211,12 +4229,17 @@  void __check_heap_object(const void *ptr, unsigned long n,
 size_t __ksize(const void *objp)
 {
 	struct kmem_cache *c;
+	struct folio *folio;
 	size_t size;
 
 	BUG_ON(!objp);
 	if (unlikely(objp == ZERO_SIZE_PTR))
 		return 0;
 
+	folio = virt_to_folio(objp);
+	if (!folio_test_slab(folio))
+		return folio_size(folio);
+
 	c = virt_to_cache(objp);
 	size = c ? c->object_size : 0;
 
diff --git a/mm/slab.h b/mm/slab.h
index c7f2abc2b154..31e98beb47a3 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -664,6 +664,25 @@  static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
 		print_tracking(cachep, x);
 	return cachep;
 }
+
+static __always_inline void kfree_hook(void *x)
+{
+	kmemleak_free(x);
+	kasan_kfree_large(x);
+}
+
+static inline void free_large_kmalloc(struct folio *folio, void *object)
+{
+	unsigned int order = folio_order(folio);
+
+	if (WARN_ON_ONCE(order == 0))
+		pr_warn_once("object pointer: 0x%p\n", object);
+
+	kfree_hook(object);
+	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
+			      -(PAGE_SIZE << order));
+	__free_pages(folio_page(folio, 0), order);
+}
 #endif /* CONFIG_SLOB */
 
 static inline size_t slab_ksize(const struct kmem_cache *s)
diff --git a/mm/slub.c b/mm/slub.c
index 261474092e43..04fd084f4709 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1686,12 +1686,6 @@  static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
 	return ptr;
 }
 
-static __always_inline void kfree_hook(void *x)
-{
-	kmemleak_free(x);
-	kasan_kfree_large(x);
-}
-
 static __always_inline bool slab_free_hook(struct kmem_cache *s,
 						void *x, bool init)
 {
@@ -3535,19 +3529,6 @@  struct detached_freelist {
 	struct kmem_cache *s;
 };
 
-static inline void free_large_kmalloc(struct folio *folio, void *object)
-{
-	unsigned int order = folio_order(folio);
-
-	if (WARN_ON_ONCE(order == 0))
-		pr_warn_once("object pointer: 0x%p\n", object);
-
-	kfree_hook(object);
-	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-			      -(PAGE_SIZE << order));
-	__free_pages(folio_page(folio, 0), order);
-}
-
 /*
  * This function progressively scans the array with free objects (with
  * a limited look ahead) and extract objects belonging to the same