diff mbox series

[08/10] mm/slab: Allow dynamic kmalloc() minimum alignment

Message ID 20220405135758.774016-9-catalin.marinas@arm.com (mailing list archive)
State New
Headers show
Series mm, arm64: Reduce ARCH_KMALLOC_MINALIGN below the cache line size | expand

Commit Message

Catalin Marinas April 5, 2022, 1:57 p.m. UTC
ARCH_KMALLOC_MINALIGN represents the minimum guaranteed kmalloc()
alignment but an architecture may require a larger run-time alignment.
Do not create kmalloc caches smaller than arch_kmalloc_minalign().

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/slab.h |  2 ++
 mm/slab.c            |  6 +-----
 mm/slab.h            |  2 ++
 mm/slab_common.c     | 33 +++++++++++++++++++++++----------
 4 files changed, 28 insertions(+), 15 deletions(-)

Comments

Hyeonggon Yoo April 7, 2022, 3:46 a.m. UTC | #1
On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> ARCH_KMALLOC_MINALIGN represents the minimum guaranteed kmalloc()
> alignment but an architecture may require a larger run-time alignment.
> Do not create kmalloc caches smaller than arch_kmalloc_minalign().
> 
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> ---
>  include/linux/slab.h |  2 ++
>  mm/slab.c            |  6 +-----
>  mm/slab.h            |  2 ++
>  mm/slab_common.c     | 33 +++++++++++++++++++++++----------
>  4 files changed, 28 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index d58211bdeceb..2137dba85691 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -332,6 +332,8 @@ enum kmalloc_cache_type {
>  extern struct kmem_cache *
>  kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
>  
> +unsigned int arch_kmalloc_minalign(void);
> +
>  /*
>   * Define gfp bits that should not be set for KMALLOC_NORMAL.
>   */
> diff --git a/mm/slab.c b/mm/slab.c
> index b04e40078bdf..4aaeeb9c994d 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -1256,11 +1256,7 @@ void __init kmem_cache_init(void)
>  	 * Initialize the caches that provide memory for the  kmem_cache_node
>  	 * structures first.  Without this, further allocations will bug.
>  	 */
> -	kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
> -				kmalloc_info[INDEX_NODE].name[KMALLOC_NORMAL],
> -				kmalloc_info[INDEX_NODE].size,
> -				ARCH_KMALLOC_FLAGS, 0,
> -				kmalloc_info[INDEX_NODE].size);
> +	new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS);
>  	slab_state = PARTIAL_NODE;
>  	setup_kmalloc_cache_index_table();
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index fd7ae2024897..e9238406602a 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -283,6 +283,8 @@ int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
>  struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
>  			slab_flags_t flags, unsigned int useroffset,
>  			unsigned int usersize);
> +void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
> +			      slab_flags_t flags);
>  extern void create_boot_cache(struct kmem_cache *, const char *name,
>  			unsigned int size, slab_flags_t flags,
>  			unsigned int useroffset, unsigned int usersize);
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 6ee64d6208b3..594d8a8a68d0 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
>  	}
>  }
>  
> -static void __init
> +unsigned int __weak arch_kmalloc_minalign(void)
> +{
> +	return ARCH_KMALLOC_MINALIGN;
> +}
> +

As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
for every user of it would be more correct?

> +void __init
>  new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
>  {
> +	unsigned int minalign = arch_kmalloc_minalign();
> +	unsigned int aligned_size = kmalloc_info[idx].size;
> +	int aligned_idx = idx;
> +
>  	if (type == KMALLOC_RECLAIM) {
>  		flags |= SLAB_RECLAIM_ACCOUNT;
>  	} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
> @@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
>  		flags |= SLAB_ACCOUNT;
>  	}
>  
> -	kmalloc_caches[type][idx] = create_kmalloc_cache(
> -					kmalloc_info[idx].name[type],
> -					kmalloc_info[idx].size, flags, 0,
> -					kmalloc_info[idx].size);
> +	if (minalign > ARCH_KMALLOC_MINALIGN) {
> +		aligned_size = ALIGN(aligned_size, minalign);
> +		aligned_idx = __kmalloc_index(aligned_size, false);
> +	}
> +
> +	if (!kmalloc_caches[type][aligned_idx])
> +		kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
> +					kmalloc_info[aligned_idx].name[type],
> +					aligned_size, flags, 0, aligned_size);
> +	if (idx != aligned_idx)
> +		kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];

I would prefer detecting minimum kmalloc size in create_kmalloc_caches()
in runtime instead of changing behavior of new_kmalloc_cache().

>  	/*
>  	 * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
> @@ -904,11 +920,8 @@ void __init create_kmalloc_caches(slab_flags_t flags)
>  		struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
>  
>  		if (s) {
> -			kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
> -				kmalloc_info[i].name[KMALLOC_DMA],
> -				kmalloc_info[i].size,
> -				SLAB_CACHE_DMA | flags, 0,
> -				kmalloc_info[i].size);
> +			new_kmalloc_cache(i, KMALLOC_DMA,
> +					  SLAB_CACHE_DMA | flags);
>  		}
>  	}
>  #endif
Catalin Marinas April 7, 2022, 8:50 a.m. UTC | #2
On Thu, Apr 07, 2022 at 03:46:37AM +0000, Hyeonggon Yoo wrote:
> On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> >  	}
> >  }
> >  
> > -static void __init
> > +unsigned int __weak arch_kmalloc_minalign(void)
> > +{
> > +	return ARCH_KMALLOC_MINALIGN;
> > +}
> > +
> 
> As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
> patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
> for every user of it would be more correct?

Not if the code currently using ARCH_KMALLOC_MINALIGN needs a constant.
Yes, there probably are a few places where the code can cope with a
dynamic arch_kmalloc_minalign() but there are two other cases where a
constant is needed:

1. As a BUILD_BUG check because the code is storing some flags in the
   bottom bits of a pointer. A smaller ARCH_KMALLOC_MINALIGN works just
   fine here.

2. As a static alignment for DMA requirements. That's where the newly
   exposed ARCH_DMA_MINALIGN should be used.

Note that this series doesn't make the situation any worse than before
since ARCH_DMA_MINALIGN stays at 128 bytes for arm64. Current users can
evolve to use a dynamic alignment in future patches. My main aim with
this series is to be able to create kmalloc-64 caches on arm64.

> > @@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
> >  		flags |= SLAB_ACCOUNT;
> >  	}
> >  
> > -	kmalloc_caches[type][idx] = create_kmalloc_cache(
> > -					kmalloc_info[idx].name[type],
> > -					kmalloc_info[idx].size, flags, 0,
> > -					kmalloc_info[idx].size);
> > +	if (minalign > ARCH_KMALLOC_MINALIGN) {
> > +		aligned_size = ALIGN(aligned_size, minalign);
> > +		aligned_idx = __kmalloc_index(aligned_size, false);
> > +	}
> > +
> > +	if (!kmalloc_caches[type][aligned_idx])
> > +		kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
> > +					kmalloc_info[aligned_idx].name[type],
> > +					aligned_size, flags, 0, aligned_size);
> > +	if (idx != aligned_idx)
> > +		kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
> 
> I would prefer detecting minimum kmalloc size in create_kmalloc_caches()
> in runtime instead of changing behavior of new_kmalloc_cache().

That was my initial attempt but we have a couple of
create_kmalloc_cache() (not *_caches) calls directly, one of them in
mm/slab.c kmem_cache_init(). So I wanted all the minalign logic in a
single place, hence I replaced the explicit create_kmalloc_cache() call
with new_kmalloc_cache(). See this patch and patch 9 for some clean-up.
Hyeonggon Yoo April 7, 2022, 9:18 a.m. UTC | #3
On Thu, Apr 07, 2022 at 09:50:23AM +0100, Catalin Marinas wrote:
> On Thu, Apr 07, 2022 at 03:46:37AM +0000, Hyeonggon Yoo wrote:
> > On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> > > --- a/mm/slab_common.c
> > > +++ b/mm/slab_common.c
> > > @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> > >  	}
> > >  }
> > >  
> > > -static void __init
> > > +unsigned int __weak arch_kmalloc_minalign(void)
> > > +{
> > > +	return ARCH_KMALLOC_MINALIGN;
> > > +}
> > > +
> > 
> > As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
> > patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
> > for every user of it would be more correct?
> 
> Not if the code currently using ARCH_KMALLOC_MINALIGN needs a constant.
> Yes, there probably are a few places where the code can cope with a
> dynamic arch_kmalloc_minalign() but there are two other cases where a
> constant is needed:
> 
> 1. As a BUILD_BUG check because the code is storing some flags in the
>    bottom bits of a pointer. A smaller ARCH_KMALLOC_MINALIGN works just
>    fine here.
> 
> 2. As a static alignment for DMA requirements. That's where the newly
>    exposed ARCH_DMA_MINALIGN should be used.
> 
> Note that this series doesn't make the situation any worse than before
> since ARCH_DMA_MINALIGN stays at 128 bytes for arm64. Current users can
> evolve to use a dynamic alignment in future patches. My main aim with
> this series is to be able to create kmalloc-64 caches on arm64.

AFAIK there are bunch of drivers that directly calls kmalloc().
It becomes tricky when e.g.) a driver allocates just 32 bytes,
but architecture requires it to be 128-byte aligned.

That's why everything allocated from kmalloc() need to be aligned in
ARCH_DMA_MINALIGN. And It's too hard to update all of drivers
that depends on this fact.

So I'm yet skeptical on decoupling ARCH_DMA/KMALLOC_MINALIGN.
Instead of decoupling it, I'm more into dynamically decreasing it.

Please kindly let me know If I'm missing something ;-)

> > > @@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
> > >  		flags |= SLAB_ACCOUNT;
> > >  	}
> > >  
> > > -	kmalloc_caches[type][idx] = create_kmalloc_cache(
> > > -					kmalloc_info[idx].name[type],
> > > -					kmalloc_info[idx].size, flags, 0,
> > > -					kmalloc_info[idx].size);
> > > +	if (minalign > ARCH_KMALLOC_MINALIGN) {
> > > +		aligned_size = ALIGN(aligned_size, minalign);
> > > +		aligned_idx = __kmalloc_index(aligned_size, false);
> > > +	}
> > > +
> > > +	if (!kmalloc_caches[type][aligned_idx])
> > > +		kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
> > > +					kmalloc_info[aligned_idx].name[type],
> > > +					aligned_size, flags, 0, aligned_size);
> > > +	if (idx != aligned_idx)
> > > +		kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
> > 
> > I would prefer detecting minimum kmalloc size in create_kmalloc_caches()
> > in runtime instead of changing behavior of new_kmalloc_cache().
> 
> That was my initial attempt but we have a couple of
> create_kmalloc_cache() (not *_caches) calls directly, one of them in
> mm/slab.c kmem_cache_init(). So I wanted all the minalign logic in a
> single place, hence I replaced the explicit create_kmalloc_cache() call
> with new_kmalloc_cache(). See this patch and patch 9 for some clean-up.
> 
> -- 
> Catalin
Catalin Marinas April 7, 2022, 9:35 a.m. UTC | #4
On Thu, Apr 07, 2022 at 06:18:16PM +0900, Hyeonggon Yoo wrote:
> On Thu, Apr 07, 2022 at 09:50:23AM +0100, Catalin Marinas wrote:
> > On Thu, Apr 07, 2022 at 03:46:37AM +0000, Hyeonggon Yoo wrote:
> > > On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> > > > --- a/mm/slab_common.c
> > > > +++ b/mm/slab_common.c
> > > > @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> > > >  	}
> > > >  }
> > > >  
> > > > -static void __init
> > > > +unsigned int __weak arch_kmalloc_minalign(void)
> > > > +{
> > > > +	return ARCH_KMALLOC_MINALIGN;
> > > > +}
> > > > +
> > > 
> > > As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
> > > patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
> > > for every user of it would be more correct?
> > 
> > Not if the code currently using ARCH_KMALLOC_MINALIGN needs a constant.
> > Yes, there probably are a few places where the code can cope with a
> > dynamic arch_kmalloc_minalign() but there are two other cases where a
> > constant is needed:
> > 
> > 1. As a BUILD_BUG check because the code is storing some flags in the
> >    bottom bits of a pointer. A smaller ARCH_KMALLOC_MINALIGN works just
> >    fine here.
> > 
> > 2. As a static alignment for DMA requirements. That's where the newly
> >    exposed ARCH_DMA_MINALIGN should be used.
> > 
> > Note that this series doesn't make the situation any worse than before
> > since ARCH_DMA_MINALIGN stays at 128 bytes for arm64. Current users can
> > evolve to use a dynamic alignment in future patches. My main aim with
> > this series is to be able to create kmalloc-64 caches on arm64.
> 
> AFAIK there are bunch of drivers that directly calls kmalloc().

Well, lots of drivers call kmalloc() ;).

> It becomes tricky when e.g.) a driver allocates just 32 bytes,
> but architecture requires it to be 128-byte aligned.

That's the current behaviour, a 32 byte allocation would return an
object from kmalloc-128. I want to reduce this to at least kmalloc-64
(or smaller) if the CPU/SoC allows it.

> That's why everything allocated from kmalloc() need to be aligned in
> ARCH_DMA_MINALIGN.

I don't get your conclusion here. Would you mind explaining?

> So I'm yet skeptical on decoupling ARCH_DMA/KMALLOC_MINALIGN. Instead
> of decoupling it, I'm more into dynamically decreasing it.

The reason for decoupling is mostly that there are some static uses of
ARCH_KMALLOC_MINALIGN as per point 1 above. The other is the
__assume_kmalloc_alignment attribute. We shouldn't have such assumed
alignment larger than what a dynamic kmalloc() would return. To me it
makes a lot more sense for ARCH_KMALLOC_MINALIGN to be the minimum
guaranteed in a kernel build but kmalloc() returning a larger alignment
at run-time than the other way around.

Thanks.
Hyeonggon Yoo April 7, 2022, 12:26 p.m. UTC | #5
On Thu, Apr 07, 2022 at 10:35:04AM +0100, Catalin Marinas wrote:
> On Thu, Apr 07, 2022 at 06:18:16PM +0900, Hyeonggon Yoo wrote:
> > On Thu, Apr 07, 2022 at 09:50:23AM +0100, Catalin Marinas wrote:
> > > On Thu, Apr 07, 2022 at 03:46:37AM +0000, Hyeonggon Yoo wrote:
> > > > On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> > > > > --- a/mm/slab_common.c
> > > > > +++ b/mm/slab_common.c
> > > > > @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
> > > > >  	}
> > > > >  }
> > > > >  
> > > > > -static void __init
> > > > > +unsigned int __weak arch_kmalloc_minalign(void)
> > > > > +{
> > > > > +	return ARCH_KMALLOC_MINALIGN;
> > > > > +}
> > > > > +
> > > > 
> > > > As ARCH_KMALLOC_ALIGN and arch_kmalloc_minalign() may not be same after
> > > > patch 10, I think s/ARCH_KMALLOC_ALIGN/arch_kmalloc_minalign/g
> > > > for every user of it would be more correct?
> > > 
> > > Not if the code currently using ARCH_KMALLOC_MINALIGN needs a constant.
> > > Yes, there probably are a few places where the code can cope with a
> > > dynamic arch_kmalloc_minalign() but there are two other cases where a
> > > constant is needed:
> > > 
> > > 1. As a BUILD_BUG check because the code is storing some flags in the
> > >    bottom bits of a pointer. A smaller ARCH_KMALLOC_MINALIGN works just
> > >    fine here.
> > > 
> > > 2. As a static alignment for DMA requirements. That's where the newly
> > >    exposed ARCH_DMA_MINALIGN should be used.
> > > 
> > > Note that this series doesn't make the situation any worse than before
> > > since ARCH_DMA_MINALIGN stays at 128 bytes for arm64. Current users can
> > > evolve to use a dynamic alignment in future patches. My main aim with
> > > this series is to be able to create kmalloc-64 caches on arm64.
> > 
> > AFAIK there are bunch of drivers that directly calls kmalloc().
> 
> Well, lots of drivers call kmalloc() ;).
> 
> > It becomes tricky when e.g.) a driver allocates just 32 bytes,
> > but architecture requires it to be 128-byte aligned.
> 
> That's the current behaviour, a 32 byte allocation would return an
> object from kmalloc-128. I want to reduce this to at least kmalloc-64
> (or smaller) if the CPU/SoC allows it.

Yeah I agree the change is worth :) Thanks for the work.

> > That's why everything allocated from kmalloc() need to be aligned in
> > ARCH_DMA_MINALIGN.
> 
> I don't get your conclusion here. Would you mind explaining?

What I wanted to say was that, why ARCH_DMA_MINALIGN should be
different from ARCH_KMALLOC_MINALIGN.

I thought the two were basically same thing. Instead of
decoupling them, I thought just decreasing them in runtime makes more sense.

> > So I'm yet skeptical on decoupling ARCH_DMA/KMALLOC_MINALIGN. Instead
> > of decoupling it, I'm more into dynamically decreasing it.
> 
> The reason for decoupling is mostly that there are some static uses of
> ARCH_KMALLOC_MINALIGN as per point 1 above. The other is the
> __assume_kmalloc_alignment attribute. We shouldn't have such assumed
> alignment larger than what a dynamic kmalloc() would return. To me it
> makes a lot more sense for ARCH_KMALLOC_MINALIGN to be the minimum
> guaranteed in a kernel build but kmalloc() returning a larger alignment
> at run-time than the other way around.

But yeah, considering the problems you mentioned, it seems unavoidable
to decouple them.

Thank you for explanation and I will review slab part soon.

> Thanks.
> 
> -- 
> Catalin
Hyeonggon Yoo April 11, 2022, 11:55 a.m. UTC | #6
On Tue, Apr 05, 2022 at 02:57:56PM +0100, Catalin Marinas wrote:
> ARCH_KMALLOC_MINALIGN represents the minimum guaranteed kmalloc()
> alignment but an architecture may require a larger run-time alignment.
> Do not create kmalloc caches smaller than arch_kmalloc_minalign().
> 
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> ---
>  include/linux/slab.h |  2 ++
>  mm/slab.c            |  6 +-----
>  mm/slab.h            |  2 ++
>  mm/slab_common.c     | 33 +++++++++++++++++++++++----------
>  4 files changed, 28 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index d58211bdeceb..2137dba85691 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -332,6 +332,8 @@ enum kmalloc_cache_type {
>  extern struct kmem_cache *
>  kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
>  
> +unsigned int arch_kmalloc_minalign(void);
> +
>  /*
>   * Define gfp bits that should not be set for KMALLOC_NORMAL.
>   */
> diff --git a/mm/slab.c b/mm/slab.c
> index b04e40078bdf..4aaeeb9c994d 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -1256,11 +1256,7 @@ void __init kmem_cache_init(void)
>  	 * Initialize the caches that provide memory for the  kmem_cache_node
>  	 * structures first.  Without this, further allocations will bug.
>  	 */
> -	kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
> -				kmalloc_info[INDEX_NODE].name[KMALLOC_NORMAL],
> -				kmalloc_info[INDEX_NODE].size,
> -				ARCH_KMALLOC_FLAGS, 0,
> -				kmalloc_info[INDEX_NODE].size);
> +	new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS);
>  	slab_state = PARTIAL_NODE;
>  	setup_kmalloc_cache_index_table();
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index fd7ae2024897..e9238406602a 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -283,6 +283,8 @@ int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
>  struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
>  			slab_flags_t flags, unsigned int useroffset,
>  			unsigned int usersize);
> +void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
> +			      slab_flags_t flags);
>  extern void create_boot_cache(struct kmem_cache *, const char *name,
>  			unsigned int size, slab_flags_t flags,
>  			unsigned int useroffset, unsigned int usersize);
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 6ee64d6208b3..594d8a8a68d0 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -838,9 +838,18 @@ void __init setup_kmalloc_cache_index_table(void)
>  	}
>  }
>  
> -static void __init
> +unsigned int __weak arch_kmalloc_minalign(void)
> +{
> +	return ARCH_KMALLOC_MINALIGN;
> +}
> +
> +void __init
>  new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
>  {
> +	unsigned int minalign = arch_kmalloc_minalign();
> +	unsigned int aligned_size = kmalloc_info[idx].size;
> +	int aligned_idx = idx;
> +
>  	if (type == KMALLOC_RECLAIM) {
>  		flags |= SLAB_RECLAIM_ACCOUNT;
>  	} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
> @@ -851,10 +860,17 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
>  		flags |= SLAB_ACCOUNT;
>  	}
>  
> -	kmalloc_caches[type][idx] = create_kmalloc_cache(
> -					kmalloc_info[idx].name[type],
> -					kmalloc_info[idx].size, flags, 0,
> -					kmalloc_info[idx].size);
> +	if (minalign > ARCH_KMALLOC_MINALIGN) {
> +		aligned_size = ALIGN(aligned_size, minalign);
> +		aligned_idx = __kmalloc_index(aligned_size, false);
> +	}
> +
> +	if (!kmalloc_caches[type][aligned_idx])
> +		kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
> +					kmalloc_info[aligned_idx].name[type],
> +					aligned_size, flags, 0, aligned_size);
> +	if (idx != aligned_idx)
> +		kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
>  
>  	/*
>  	 * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
> @@ -904,11 +920,8 @@ void __init create_kmalloc_caches(slab_flags_t flags)
>  		struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
>  
>  		if (s) {
> -			kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
> -				kmalloc_info[i].name[KMALLOC_DMA],
> -				kmalloc_info[i].size,
> -				SLAB_CACHE_DMA | flags, 0,
> -				kmalloc_info[i].size);
> +			new_kmalloc_cache(i, KMALLOC_DMA,
> +					  SLAB_CACHE_DMA | flags);
>  		}
>  	}
>  #endif

Looks good to me.

Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

And works fine with SLAB/SLUB/SLOB on my arm64 machine.

Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Thanks!
diff mbox series

Patch

diff --git a/include/linux/slab.h b/include/linux/slab.h
index d58211bdeceb..2137dba85691 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -332,6 +332,8 @@  enum kmalloc_cache_type {
 extern struct kmem_cache *
 kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
 
+unsigned int arch_kmalloc_minalign(void);
+
 /*
  * Define gfp bits that should not be set for KMALLOC_NORMAL.
  */
diff --git a/mm/slab.c b/mm/slab.c
index b04e40078bdf..4aaeeb9c994d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1256,11 +1256,7 @@  void __init kmem_cache_init(void)
 	 * Initialize the caches that provide memory for the  kmem_cache_node
 	 * structures first.  Without this, further allocations will bug.
 	 */
-	kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
-				kmalloc_info[INDEX_NODE].name[KMALLOC_NORMAL],
-				kmalloc_info[INDEX_NODE].size,
-				ARCH_KMALLOC_FLAGS, 0,
-				kmalloc_info[INDEX_NODE].size);
+	new_kmalloc_cache(INDEX_NODE, KMALLOC_NORMAL, ARCH_KMALLOC_FLAGS);
 	slab_state = PARTIAL_NODE;
 	setup_kmalloc_cache_index_table();
 
diff --git a/mm/slab.h b/mm/slab.h
index fd7ae2024897..e9238406602a 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -283,6 +283,8 @@  int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
 struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
 			slab_flags_t flags, unsigned int useroffset,
 			unsigned int usersize);
+void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
+			      slab_flags_t flags);
 extern void create_boot_cache(struct kmem_cache *, const char *name,
 			unsigned int size, slab_flags_t flags,
 			unsigned int useroffset, unsigned int usersize);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6ee64d6208b3..594d8a8a68d0 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -838,9 +838,18 @@  void __init setup_kmalloc_cache_index_table(void)
 	}
 }
 
-static void __init
+unsigned int __weak arch_kmalloc_minalign(void)
+{
+	return ARCH_KMALLOC_MINALIGN;
+}
+
+void __init
 new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
 {
+	unsigned int minalign = arch_kmalloc_minalign();
+	unsigned int aligned_size = kmalloc_info[idx].size;
+	int aligned_idx = idx;
+
 	if (type == KMALLOC_RECLAIM) {
 		flags |= SLAB_RECLAIM_ACCOUNT;
 	} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
@@ -851,10 +860,17 @@  new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
 		flags |= SLAB_ACCOUNT;
 	}
 
-	kmalloc_caches[type][idx] = create_kmalloc_cache(
-					kmalloc_info[idx].name[type],
-					kmalloc_info[idx].size, flags, 0,
-					kmalloc_info[idx].size);
+	if (minalign > ARCH_KMALLOC_MINALIGN) {
+		aligned_size = ALIGN(aligned_size, minalign);
+		aligned_idx = __kmalloc_index(aligned_size, false);
+	}
+
+	if (!kmalloc_caches[type][aligned_idx])
+		kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
+					kmalloc_info[aligned_idx].name[type],
+					aligned_size, flags, 0, aligned_size);
+	if (idx != aligned_idx)
+		kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
 
 	/*
 	 * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
@@ -904,11 +920,8 @@  void __init create_kmalloc_caches(slab_flags_t flags)
 		struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
 
 		if (s) {
-			kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
-				kmalloc_info[i].name[KMALLOC_DMA],
-				kmalloc_info[i].size,
-				SLAB_CACHE_DMA | flags, 0,
-				kmalloc_info[i].size);
+			new_kmalloc_cache(i, KMALLOC_DMA,
+					  SLAB_CACHE_DMA | flags);
 		}
 	}
 #endif