diff mbox series

[v3,3/4] memblock: introduce MEMBLOCK_RSRV_NOINIT_VMEMMAP flag

Message ID 20230825111836.1715308-4-usama.arif@bytedance.com (mailing list archive)
State New
Headers show
Series mm: hugetlb: Skip initialization of gigantic tail struct pages if freed by HVO | expand

Commit Message

Usama Arif Aug. 25, 2023, 11:18 a.m. UTC
For reserved memory regions marked with this flag,
reserve_bootmem_region is not called during memmap_init_reserved_pages.
This can be used to avoid struct page initialization for
regions which won't need them, for e.g. hugepages with
HVO enabled.

Signed-off-by: Usama Arif <usama.arif@bytedance.com>
---
 include/linux/memblock.h | 10 ++++++++++
 mm/memblock.c            | 32 +++++++++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 5 deletions(-)

Comments

Muchun Song Aug. 28, 2023, 7:26 a.m. UTC | #1
> On Aug 25, 2023, at 19:18, Usama Arif <usama.arif@bytedance.com> wrote:
> 
> For reserved memory regions marked with this flag,
> reserve_bootmem_region is not called during memmap_init_reserved_pages.
> This can be used to avoid struct page initialization for
> regions which won't need them, for e.g. hugepages with
> HVO enabled.
> 
> Signed-off-by: Usama Arif <usama.arif@bytedance.com>

Reviewed-by: Muchun Song <songmuchun@bytedance.com>

One nit below.

> ---
> include/linux/memblock.h | 10 ++++++++++
> mm/memblock.c            | 32 +++++++++++++++++++++++++++-----
> 2 files changed, 37 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index f71ff9f0ec81..6d681d053880 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn;
>  * via a driver, and never indicated in the firmware-provided memory map as
>  * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
>  * kernel resource tree.
> + * @MEMBLOCK_RSRV_NOINIT_VMEMMAP: memory region for which struct pages are
> + * not initialized (only for reserved regions).

We have a more detailed explanation here.

>  */
> enum memblock_flags {
> 	MEMBLOCK_NONE = 0x0, /* No special request */
> @@ -47,6 +49,8 @@ enum memblock_flags {
> 	MEMBLOCK_MIRROR = 0x2, /* mirrored region */
> 	MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
> 	MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */
> +	/* don't initialize struct pages associated with this reserver memory block */

Those comments right after the macros here seem like a brief explanation.
To keep the consistent with others, maybe "don't initialize struct pages"
is enough? At least, a detailed one is redundant and repetitive compared
with the above one.

> +	MEMBLOCK_RSRV_NOINIT_VMEMMAP = 0x10,
> };
Mike Rapoport Aug. 28, 2023, 7:47 a.m. UTC | #2
On Fri, Aug 25, 2023 at 12:18:35PM +0100, Usama Arif wrote:
> For reserved memory regions marked with this flag,
> reserve_bootmem_region is not called during memmap_init_reserved_pages.
> This can be used to avoid struct page initialization for
> regions which won't need them, for e.g. hugepages with
> HVO enabled.
> 
> Signed-off-by: Usama Arif <usama.arif@bytedance.com>
> ---
>  include/linux/memblock.h | 10 ++++++++++
>  mm/memblock.c            | 32 +++++++++++++++++++++++++++-----
>  2 files changed, 37 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index f71ff9f0ec81..6d681d053880 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn;
>   * via a driver, and never indicated in the firmware-provided memory map as
>   * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
>   * kernel resource tree.
> + * @MEMBLOCK_RSRV_NOINIT_VMEMMAP: memory region for which struct pages are
> + * not initialized (only for reserved regions).
>   */
>  enum memblock_flags {
>  	MEMBLOCK_NONE		= 0x0,	/* No special request */
> @@ -47,6 +49,8 @@ enum memblock_flags {
>  	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
>  	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
>  	MEMBLOCK_DRIVER_MANAGED = 0x8,	/* always detected via a driver */
> +	/* don't initialize struct pages associated with this reserver memory block */
> +	MEMBLOCK_RSRV_NOINIT_VMEMMAP	= 0x10,

The flag means that struct page shouldn't be initialized, it may be used
not only by vmemmap optimizations.
Please drop _VMEMMAP.

And I agree with Muchun's remarks about the comments.



>  };
>  
>  /**
> @@ -125,6 +129,7 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
>  int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
>  int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
>  int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
> +int memblock_reserved_mark_noinit_vmemmap(phys_addr_t base, phys_addr_t size);

memblock does not care about vmemmap, please drop _vmemmap here and below as well.
  
>  void memblock_free_all(void);
>  void memblock_free(void *ptr, size_t size);
> @@ -259,6 +264,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
>  	return m->flags & MEMBLOCK_NOMAP;
>  }
>  
> +static inline bool memblock_is_noinit_vmemmap(struct memblock_region *m)

memblock_is_reserved_noinit please.

> +{
> +	return m->flags & MEMBLOCK_RSRV_NOINIT_VMEMMAP;
> +}
> +
>  static inline bool memblock_is_driver_managed(struct memblock_region *m)
>  {
>  	return m->flags & MEMBLOCK_DRIVER_MANAGED;
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 43cb4404d94c..a9782228c840 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -991,6 +991,23 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
>  	return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP);
>  }
>  
> +/**
> + * memblock_reserved_mark_noinit_vmemmap - Mark a reserved memory region with flag
> + * MEMBLOCK_RSRV_NOINIT_VMEMMAP.

this should be about what marking RSRV_NOINIT does, not what flag it uses

> + * @base: the base phys addr of the region
> + * @size: the size of the region
> + *
> + * struct pages will not be initialized for reserved memory regions marked with
> + * %MEMBLOCK_RSRV_NOINIT_VMEMMAP.
> + *
> + * Return: 0 on success, -errno on failure.
> + */
> +int __init_memblock memblock_reserved_mark_noinit_vmemmap(phys_addr_t base, phys_addr_t size)
> +{
> +	return memblock_setclr_flag(&memblock.reserved, base, size, 1,
> +				    MEMBLOCK_RSRV_NOINIT_VMEMMAP);
> +}
> +
>  static bool should_skip_region(struct memblock_type *type,
>  			       struct memblock_region *m,
>  			       int nid, int flags)
> @@ -2107,13 +2124,18 @@ static void __init memmap_init_reserved_pages(void)
>  		memblock_set_node(start, end, &memblock.reserved, nid);
>  	}
>  
> -	/* initialize struct pages for the reserved regions */
> +	/*
> +	 * initialize struct pages for reserved regions that don't have
> +	 * the MEMBLOCK_RSRV_NOINIT_VMEMMAP flag set
> +	 */
>  	for_each_reserved_mem_region(region) {
> -		nid = memblock_get_region_node(region);
> -		start = region->base;
> -		end = start + region->size;
> +		if (!memblock_is_noinit_vmemmap(region)) {
> +			nid = memblock_get_region_node(region);
> +			start = region->base;
> +			end = start + region->size;
>  
> -		reserve_bootmem_region(start, end, nid);
> +			reserve_bootmem_region(start, end, nid);
> +		}
>  	}
>  }
>  
> -- 
> 2.25.1
>
Muchun Song Aug. 28, 2023, 8:52 a.m. UTC | #3
On 2023/8/28 15:47, Mike Rapoport wrote:
> On Fri, Aug 25, 2023 at 12:18:35PM +0100, Usama Arif wrote:
>> For reserved memory regions marked with this flag,
>> reserve_bootmem_region is not called during memmap_init_reserved_pages.
>> This can be used to avoid struct page initialization for
>> regions which won't need them, for e.g. hugepages with
>> HVO enabled.
>>
>> Signed-off-by: Usama Arif <usama.arif@bytedance.com>
>> ---
>>   include/linux/memblock.h | 10 ++++++++++
>>   mm/memblock.c            | 32 +++++++++++++++++++++++++++-----
>>   2 files changed, 37 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
>> index f71ff9f0ec81..6d681d053880 100644
>> --- a/include/linux/memblock.h
>> +++ b/include/linux/memblock.h
>> @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn;
>>    * via a driver, and never indicated in the firmware-provided memory map as
>>    * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
>>    * kernel resource tree.
>> + * @MEMBLOCK_RSRV_NOINIT_VMEMMAP: memory region for which struct pages are
>> + * not initialized (only for reserved regions).
>>    */
>>   enum memblock_flags {
>>   	MEMBLOCK_NONE		= 0x0,	/* No special request */
>> @@ -47,6 +49,8 @@ enum memblock_flags {
>>   	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
>>   	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
>>   	MEMBLOCK_DRIVER_MANAGED = 0x8,	/* always detected via a driver */
>> +	/* don't initialize struct pages associated with this reserver memory block */
>> +	MEMBLOCK_RSRV_NOINIT_VMEMMAP	= 0x10,
> The flag means that struct page shouldn't be initialized, it may be used
> not only by vmemmap optimizations.
> Please drop _VMEMMAP.

The area at where the struct pages located is vmemmap, I think the
"vmemap" suffix does not mean that it is for "vmemmap optimization",
it could specify the target which will not be initialized. For me,
MEMBLOCK_RSRV_NOINIT does not tell me what should not be initialized,
memblock itself or its struct page (aka vmemmap pages)? So maybe
the suffix is better to keep?

>
> And I agree with Muchun's remarks about the comments.
>
>
>
>>   };
>>   
>>   /**
>> @@ -125,6 +129,7 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
>>   int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
>>   int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
>>   int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
>> +int memblock_reserved_mark_noinit_vmemmap(phys_addr_t base, phys_addr_t size);
> memblock does not care about vmemmap, please drop _vmemmap here and below as well.
>    
>>   void memblock_free_all(void);
>>   void memblock_free(void *ptr, size_t size);
>> @@ -259,6 +264,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
>>   	return m->flags & MEMBLOCK_NOMAP;
>>   }
>>   
>> +static inline bool memblock_is_noinit_vmemmap(struct memblock_region *m)
> memblock_is_reserved_noinit please.
>
>> +{
>> +	return m->flags & MEMBLOCK_RSRV_NOINIT_VMEMMAP;
>> +}
>> +
>>   static inline bool memblock_is_driver_managed(struct memblock_region *m)
>>   {
>>   	return m->flags & MEMBLOCK_DRIVER_MANAGED;
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index 43cb4404d94c..a9782228c840 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -991,6 +991,23 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
>>   	return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP);
>>   }
>>   
>> +/**
>> + * memblock_reserved_mark_noinit_vmemmap - Mark a reserved memory region with flag
>> + * MEMBLOCK_RSRV_NOINIT_VMEMMAP.
> this should be about what marking RSRV_NOINIT does, not what flag it uses
>
>> + * @base: the base phys addr of the region
>> + * @size: the size of the region
>> + *
>> + * struct pages will not be initialized for reserved memory regions marked with
>> + * %MEMBLOCK_RSRV_NOINIT_VMEMMAP.
>> + *
>> + * Return: 0 on success, -errno on failure.
>> + */
>> +int __init_memblock memblock_reserved_mark_noinit_vmemmap(phys_addr_t base, phys_addr_t size)
>> +{
>> +	return memblock_setclr_flag(&memblock.reserved, base, size, 1,
>> +				    MEMBLOCK_RSRV_NOINIT_VMEMMAP);
>> +}
>> +
>>   static bool should_skip_region(struct memblock_type *type,
>>   			       struct memblock_region *m,
>>   			       int nid, int flags)
>> @@ -2107,13 +2124,18 @@ static void __init memmap_init_reserved_pages(void)
>>   		memblock_set_node(start, end, &memblock.reserved, nid);
>>   	}
>>   
>> -	/* initialize struct pages for the reserved regions */
>> +	/*
>> +	 * initialize struct pages for reserved regions that don't have
>> +	 * the MEMBLOCK_RSRV_NOINIT_VMEMMAP flag set
>> +	 */
>>   	for_each_reserved_mem_region(region) {
>> -		nid = memblock_get_region_node(region);
>> -		start = region->base;
>> -		end = start + region->size;
>> +		if (!memblock_is_noinit_vmemmap(region)) {
>> +			nid = memblock_get_region_node(region);
>> +			start = region->base;
>> +			end = start + region->size;
>>   
>> -		reserve_bootmem_region(start, end, nid);
>> +			reserve_bootmem_region(start, end, nid);
>> +		}
>>   	}
>>   }
>>   
>> -- 
>> 2.25.1
>>
Mike Rapoport Aug. 28, 2023, 9:09 a.m. UTC | #4
On Mon, Aug 28, 2023 at 04:52:10PM +0800, Muchun Song wrote:
> 
> 
> On 2023/8/28 15:47, Mike Rapoport wrote:
> > On Fri, Aug 25, 2023 at 12:18:35PM +0100, Usama Arif wrote:
> > > For reserved memory regions marked with this flag,
> > > reserve_bootmem_region is not called during memmap_init_reserved_pages.
> > > This can be used to avoid struct page initialization for
> > > regions which won't need them, for e.g. hugepages with
> > > HVO enabled.
> > > 
> > > Signed-off-by: Usama Arif <usama.arif@bytedance.com>
> > > ---
> > >   include/linux/memblock.h | 10 ++++++++++
> > >   mm/memblock.c            | 32 +++++++++++++++++++++++++++-----
> > >   2 files changed, 37 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> > > index f71ff9f0ec81..6d681d053880 100644
> > > --- a/include/linux/memblock.h
> > > +++ b/include/linux/memblock.h
> > > @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn;
> > >    * via a driver, and never indicated in the firmware-provided memory map as
> > >    * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
> > >    * kernel resource tree.
> > > + * @MEMBLOCK_RSRV_NOINIT_VMEMMAP: memory region for which struct pages are
> > > + * not initialized (only for reserved regions).
> > >    */
> > >   enum memblock_flags {
> > >   	MEMBLOCK_NONE		= 0x0,	/* No special request */
> > > @@ -47,6 +49,8 @@ enum memblock_flags {
> > >   	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
> > >   	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
> > >   	MEMBLOCK_DRIVER_MANAGED = 0x8,	/* always detected via a driver */
> > > +	/* don't initialize struct pages associated with this reserver memory block */
> > > +	MEMBLOCK_RSRV_NOINIT_VMEMMAP	= 0x10,
> > The flag means that struct page shouldn't be initialized, it may be used
> > not only by vmemmap optimizations.
> > Please drop _VMEMMAP.
> 
> The area at where the struct pages located is vmemmap, I think the
> "vmemap" suffix does not mean that it is for "vmemmap optimization",
> it could specify the target which will not be initialized. For me,
> MEMBLOCK_RSRV_NOINIT does not tell me what should not be initialized,
> memblock itself or its struct page (aka vmemmap pages)? So maybe
> the suffix is better to keep?

In general case the area is memmap rather than vmemmap, so a better suffix
then would be _MEMMAP. I'm not too fond of that either, but I cannot think
of better name.
 
> > 
> > And I agree with Muchun's remarks about the comments.
> > 
> > 
> > 
> > >   };
> > >   /**
> > > @@ -125,6 +129,7 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
> > >   int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
> > >   int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
> > >   int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
> > > +int memblock_reserved_mark_noinit_vmemmap(phys_addr_t base, phys_addr_t size);
> > memblock does not care about vmemmap, please drop _vmemmap here and below as well.
> > >   void memblock_free_all(void);
> > >   void memblock_free(void *ptr, size_t size);
> > > @@ -259,6 +264,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
> > >   	return m->flags & MEMBLOCK_NOMAP;
> > >   }
> > > +static inline bool memblock_is_noinit_vmemmap(struct memblock_region *m)
> > memblock_is_reserved_noinit please.
> > 
> > > +{
> > > +	return m->flags & MEMBLOCK_RSRV_NOINIT_VMEMMAP;
> > > +}
> > > +
> > >   static inline bool memblock_is_driver_managed(struct memblock_region *m)
> > >   {
> > >   	return m->flags & MEMBLOCK_DRIVER_MANAGED;
> > > diff --git a/mm/memblock.c b/mm/memblock.c
> > > index 43cb4404d94c..a9782228c840 100644
> > > --- a/mm/memblock.c
> > > +++ b/mm/memblock.c
> > > @@ -991,6 +991,23 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
> > >   	return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP);
> > >   }
> > > +/**
> > > + * memblock_reserved_mark_noinit_vmemmap - Mark a reserved memory region with flag
> > > + * MEMBLOCK_RSRV_NOINIT_VMEMMAP.
> > this should be about what marking RSRV_NOINIT does, not what flag it uses
> > 
> > > + * @base: the base phys addr of the region
> > > + * @size: the size of the region
> > > + *
> > > + * struct pages will not be initialized for reserved memory regions marked with
> > > + * %MEMBLOCK_RSRV_NOINIT_VMEMMAP.
> > > + *
> > > + * Return: 0 on success, -errno on failure.
> > > + */
> > > +int __init_memblock memblock_reserved_mark_noinit_vmemmap(phys_addr_t base, phys_addr_t size)
> > > +{
> > > +	return memblock_setclr_flag(&memblock.reserved, base, size, 1,
> > > +				    MEMBLOCK_RSRV_NOINIT_VMEMMAP);
> > > +}
> > > +
> > >   static bool should_skip_region(struct memblock_type *type,
> > >   			       struct memblock_region *m,
> > >   			       int nid, int flags)
> > > @@ -2107,13 +2124,18 @@ static void __init memmap_init_reserved_pages(void)
> > >   		memblock_set_node(start, end, &memblock.reserved, nid);
> > >   	}
> > > -	/* initialize struct pages for the reserved regions */
> > > +	/*
> > > +	 * initialize struct pages for reserved regions that don't have
> > > +	 * the MEMBLOCK_RSRV_NOINIT_VMEMMAP flag set
> > > +	 */
> > >   	for_each_reserved_mem_region(region) {
> > > -		nid = memblock_get_region_node(region);
> > > -		start = region->base;
> > > -		end = start + region->size;
> > > +		if (!memblock_is_noinit_vmemmap(region)) {
> > > +			nid = memblock_get_region_node(region);
> > > +			start = region->base;
> > > +			end = start + region->size;
> > > -		reserve_bootmem_region(start, end, nid);
> > > +			reserve_bootmem_region(start, end, nid);
> > > +		}
> > >   	}
> > >   }
> > > -- 
> > > 2.25.1
> > > 
>
Muchun Song Aug. 28, 2023, 9:18 a.m. UTC | #5
> On Aug 28, 2023, at 17:09, Mike Rapoport <rppt@kernel.org> wrote:
> 
> On Mon, Aug 28, 2023 at 04:52:10PM +0800, Muchun Song wrote:
>> 
>> 
>> On 2023/8/28 15:47, Mike Rapoport wrote:
>>> On Fri, Aug 25, 2023 at 12:18:35PM +0100, Usama Arif wrote:
>>>> For reserved memory regions marked with this flag,
>>>> reserve_bootmem_region is not called during memmap_init_reserved_pages.
>>>> This can be used to avoid struct page initialization for
>>>> regions which won't need them, for e.g. hugepages with
>>>> HVO enabled.
>>>> 
>>>> Signed-off-by: Usama Arif <usama.arif@bytedance.com>
>>>> ---
>>>>  include/linux/memblock.h | 10 ++++++++++
>>>>  mm/memblock.c            | 32 +++++++++++++++++++++++++++-----
>>>>  2 files changed, 37 insertions(+), 5 deletions(-)
>>>> 
>>>> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
>>>> index f71ff9f0ec81..6d681d053880 100644
>>>> --- a/include/linux/memblock.h
>>>> +++ b/include/linux/memblock.h
>>>> @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn;
>>>>   * via a driver, and never indicated in the firmware-provided memory map as
>>>>   * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
>>>>   * kernel resource tree.
>>>> + * @MEMBLOCK_RSRV_NOINIT_VMEMMAP: memory region for which struct pages are
>>>> + * not initialized (only for reserved regions).
>>>>   */
>>>>  enum memblock_flags {
>>>>   MEMBLOCK_NONE = 0x0, /* No special request */
>>>> @@ -47,6 +49,8 @@ enum memblock_flags {
>>>>   MEMBLOCK_MIRROR = 0x2, /* mirrored region */
>>>>   MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
>>>>   MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */
>>>> + /* don't initialize struct pages associated with this reserver memory block */
>>>> + MEMBLOCK_RSRV_NOINIT_VMEMMAP = 0x10,
>>> The flag means that struct page shouldn't be initialized, it may be used
>>> not only by vmemmap optimizations.
>>> Please drop _VMEMMAP.
>> 
>> The area at where the struct pages located is vmemmap, I think the
>> "vmemap" suffix does not mean that it is for "vmemmap optimization",
>> it could specify the target which will not be initialized. For me,
>> MEMBLOCK_RSRV_NOINIT does not tell me what should not be initialized,
>> memblock itself or its struct page (aka vmemmap pages)? So maybe
>> the suffix is better to keep?
> In general case the area is memmap rather than vmemmap, so a better suffix

Right. memmap

> then would be _MEMMAP. I'm not too fond of that either, but I cannot think
> of better name.

I have no strong opinion, if we cannot think a better name, just drop the
suffix as you suggested and let the comments more specified. :-)

Thanks.
diff mbox series

Patch

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f71ff9f0ec81..6d681d053880 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -40,6 +40,8 @@  extern unsigned long long max_possible_pfn;
  * via a driver, and never indicated in the firmware-provided memory map as
  * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
  * kernel resource tree.
+ * @MEMBLOCK_RSRV_NOINIT_VMEMMAP: memory region for which struct pages are
+ * not initialized (only for reserved regions).
  */
 enum memblock_flags {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
@@ -47,6 +49,8 @@  enum memblock_flags {
 	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
 	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
 	MEMBLOCK_DRIVER_MANAGED = 0x8,	/* always detected via a driver */
+	/* don't initialize struct pages associated with this reserver memory block */
+	MEMBLOCK_RSRV_NOINIT_VMEMMAP	= 0x10,
 };
 
 /**
@@ -125,6 +129,7 @@  int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
+int memblock_reserved_mark_noinit_vmemmap(phys_addr_t base, phys_addr_t size);
 
 void memblock_free_all(void);
 void memblock_free(void *ptr, size_t size);
@@ -259,6 +264,11 @@  static inline bool memblock_is_nomap(struct memblock_region *m)
 	return m->flags & MEMBLOCK_NOMAP;
 }
 
+static inline bool memblock_is_noinit_vmemmap(struct memblock_region *m)
+{
+	return m->flags & MEMBLOCK_RSRV_NOINIT_VMEMMAP;
+}
+
 static inline bool memblock_is_driver_managed(struct memblock_region *m)
 {
 	return m->flags & MEMBLOCK_DRIVER_MANAGED;
diff --git a/mm/memblock.c b/mm/memblock.c
index 43cb4404d94c..a9782228c840 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -991,6 +991,23 @@  int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
 	return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP);
 }
 
+/**
+ * memblock_reserved_mark_noinit_vmemmap - Mark a reserved memory region with flag
+ * MEMBLOCK_RSRV_NOINIT_VMEMMAP.
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ *
+ * struct pages will not be initialized for reserved memory regions marked with
+ * %MEMBLOCK_RSRV_NOINIT_VMEMMAP.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int __init_memblock memblock_reserved_mark_noinit_vmemmap(phys_addr_t base, phys_addr_t size)
+{
+	return memblock_setclr_flag(&memblock.reserved, base, size, 1,
+				    MEMBLOCK_RSRV_NOINIT_VMEMMAP);
+}
+
 static bool should_skip_region(struct memblock_type *type,
 			       struct memblock_region *m,
 			       int nid, int flags)
@@ -2107,13 +2124,18 @@  static void __init memmap_init_reserved_pages(void)
 		memblock_set_node(start, end, &memblock.reserved, nid);
 	}
 
-	/* initialize struct pages for the reserved regions */
+	/*
+	 * initialize struct pages for reserved regions that don't have
+	 * the MEMBLOCK_RSRV_NOINIT_VMEMMAP flag set
+	 */
 	for_each_reserved_mem_region(region) {
-		nid = memblock_get_region_node(region);
-		start = region->base;
-		end = start + region->size;
+		if (!memblock_is_noinit_vmemmap(region)) {
+			nid = memblock_get_region_node(region);
+			start = region->base;
+			end = start + region->size;
 
-		reserve_bootmem_region(start, end, nid);
+			reserve_bootmem_region(start, end, nid);
+		}
 	}
 }