diff mbox series

memblock: update numa node of memblk reserved type

Message ID 20230523115708.195597-1-wangkefeng.wang@huawei.com (mailing list archive)
State New
Headers show
Series memblock: update numa node of memblk reserved type | expand

Commit Message

Kefeng Wang May 23, 2023, 11:57 a.m. UTC
The numa node of memblk reserved type is wrong, it could update
according to the numa node information from memblk memory type,
let's fix it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/memblock.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

Comments

Anshuman Khandual May 24, 2023, 4:59 a.m. UTC | #1
On 5/23/23 17:27, Kefeng Wang wrote:
> The numa node of memblk reserved type is wrong, it could update
> according to the numa node information from memblk memory type,
> let's fix it.

Indeed it's wrong at present and can be verified from sysfs file
(/sys/kernel/debug/memblock/reserved) accessed in user space.

> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  mm/memblock.c | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/mm/memblock.c b/mm/memblock.c
> index a50447d970ef..45a0781cda31 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1922,6 +1922,28 @@ phys_addr_t __init_memblock memblock_get_current_limit(void)
>  	return memblock.current_limit;
>  }
>  
> +static void __init_memblock memblock_reserved_update_node(void)
> +{
> +	struct memblock_region *rgn;
> +	phys_addr_t base, end, size;
> +	int ret;
> +
> +	if (!IS_ENABLED(CONFIG_NUMA))
> +		return;
> +
> +	for_each_mem_region(rgn) {
> +		base = rgn->base;
> +		size = rgn->size;
> +		end = base + size - 1;
> +
> +		ret = memblock_set_node(base, size, &memblock.reserved,
> +					memblock_get_region_node(rgn));
> +		if (ret)
> +			pr_err("memblock: Failed to update reserved [%pa-%pa] node",
> +			       &base, &end);
> +	}
> +}
> +
>  static void __init_memblock memblock_dump(struct memblock_type *type)
>  {
>  	phys_addr_t base, end, size;
> @@ -1955,6 +1977,7 @@ static void __init_memblock __memblock_dump_all(void)
>  		&memblock.memory.total_size,
>  		&memblock.reserved.total_size);
>  
> +	memblock_reserved_update_node();

__memblock_dump_all() gets called only when memblock_debug is enabled.
This helper should be called directly inside memblock_dump_all() right
at the beginning, regardless of memblock_debug.

diff --git a/mm/memblock.c b/mm/memblock.c
index 804fae92d56f..008c4e86d7f3 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1954,7 +1954,6 @@ static void __init_memblock __memblock_dump_all(void)
                &memblock.memory.total_size,
                &memblock.reserved.total_size);
 
-       memblock_reserved_update_node();
        memblock_dump(&memblock.memory);
        memblock_dump(&memblock.reserved);
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
@@ -1964,6 +1963,8 @@ static void __init_memblock __memblock_dump_all(void)
 
 void __init_memblock memblock_dump_all(void)
 {
+       memblock_reserved_update_node();
+
        if (memblock_debug)
                __memblock_dump_all();
 }

>  	memblock_dump(&memblock.memory);
>  	memblock_dump(&memblock.reserved);
>  #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
> @@ -2196,6 +2219,8 @@ static int memblock_debug_show(struct seq_file *m, void *private)
>  	unsigned int count = ARRAY_SIZE(flagname);
>  	phys_addr_t end;
>  
> +	memblock_reserved_update_node();
> +

This is redundant, should be dropped. Reserved memblock ranges need not
be scanned, each time the sysfs file is accessed from user space.

>  	for (i = 0; i < type->cnt; i++) {
>  		reg = &type->regions[i];
>  		end = reg->base + reg->size - 1;
Kefeng Wang May 24, 2023, 6:47 a.m. UTC | #2
On 2023/5/24 12:59, Anshuman Khandual wrote:
> 
> 
> On 5/23/23 17:27, Kefeng Wang wrote:
>> The numa node of memblk reserved type is wrong, it could update
>> according to the numa node information from memblk memory type,
>> let's fix it.
> 
> Indeed it's wrong at present and can be verified from sysfs file
> (/sys/kernel/debug/memblock/reserved) accessed in user space.

Yes, both memblock_dump() and sysfs show wrong value.
> 
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   mm/memblock.c | 25 +++++++++++++++++++++++++
>>   1 file changed, 25 insertions(+)
>>
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index a50447d970ef..45a0781cda31 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -1922,6 +1922,28 @@ phys_addr_t __init_memblock memblock_get_current_limit(void)
>>   	return memblock.current_limit;
>>   }
>>   
>> +static void __init_memblock memblock_reserved_update_node(void)
>> +{
>> +	struct memblock_region *rgn;
>> +	phys_addr_t base, end, size;
>> +	int ret;
>> +
>> +	if (!IS_ENABLED(CONFIG_NUMA))
>> +		return;
>> +
>> +	for_each_mem_region(rgn) {
>> +		base = rgn->base;
>> +		size = rgn->size;
>> +		end = base + size - 1;
>> +
>> +		ret = memblock_set_node(base, size, &memblock.reserved,
>> +					memblock_get_region_node(rgn));
>> +		if (ret)
>> +			pr_err("memblock: Failed to update reserved [%pa-%pa] node",
>> +			       &base, &end);
>> +	}
>> +}
>> +
>>   static void __init_memblock memblock_dump(struct memblock_type *type)
>>   {
>>   	phys_addr_t base, end, size;
>> @@ -1955,6 +1977,7 @@ static void __init_memblock __memblock_dump_all(void)
>>   		&memblock.memory.total_size,
>>   		&memblock.reserved.total_size);
>>   
>> +	memblock_reserved_update_node();
> 
> __memblock_dump_all() gets called only when memblock_debug is enabled.
> This helper should be called directly inside memblock_dump_all() right
> at the beginning, regardless of memblock_debug.

This is my first though, but I found there are still many memblock_alloc 
and memblock_reserve after memblock_dump_all(), so I update it twice,

1) __memblock_dump_all()
2) memblock_debug_show()

and without the above two interface, no one care about the reserved node
info, so I put memblock_reserved_update_node into __memblock_dump_all().


>>   #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
>> @@ -2196,6 +2219,8 @@ static int memblock_debug_show(struct seq_file *m, void *private)
>>   	unsigned int count = ARRAY_SIZE(flagname);
>>   	phys_addr_t end;
>>   
>> +	memblock_reserved_update_node();
>> +
> 

> This is redundant, should be dropped. Reserved memblock ranges need not
> be scanned, each time the sysfs file is accessed from user space.

Yes, it's better to move it into memblock_init_debugfs(),
which only called once.
Mike Rapoport May 24, 2023, 3:33 p.m. UTC | #3
On Wed, May 24, 2023 at 02:47:26PM +0800, Kefeng Wang wrote:
> 
> On 2023/5/24 12:59, Anshuman Khandual wrote:
> > 
> > On 5/23/23 17:27, Kefeng Wang wrote:
> > > The numa node of memblk reserved type is wrong, it could update
> > > according to the numa node information from memblk memory type,
> > > let's fix it.
> > 
> > Indeed it's wrong at present and can be verified from sysfs file
> > (/sys/kernel/debug/memblock/reserved) accessed in user space.
> 
> Yes, both memblock_dump() and sysfs show wrong value.
> > 
> > > 
> > > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> > > ---
> > >   mm/memblock.c | 25 +++++++++++++++++++++++++
> > >   1 file changed, 25 insertions(+)
> > > 
> > > diff --git a/mm/memblock.c b/mm/memblock.c
> > > index a50447d970ef..45a0781cda31 100644
> > > --- a/mm/memblock.c
> > > +++ b/mm/memblock.c
> > > @@ -1922,6 +1922,28 @@ phys_addr_t __init_memblock memblock_get_current_limit(void)
> > >   	return memblock.current_limit;
> > >   }
> > > +static void __init_memblock memblock_reserved_update_node(void)
> > > +{
> > > +	struct memblock_region *rgn;
> > > +	phys_addr_t base, end, size;
> > > +	int ret;
> > > +
> > > +	if (!IS_ENABLED(CONFIG_NUMA))
> > > +		return;
> > > +
> > > +	for_each_mem_region(rgn) {
> > > +		base = rgn->base;
> > > +		size = rgn->size;
> > > +		end = base + size - 1;
> > > +
> > > +		ret = memblock_set_node(base, size, &memblock.reserved,
> > > +					memblock_get_region_node(rgn));
> > > +		if (ret)
> > > +			pr_err("memblock: Failed to update reserved [%pa-%pa] node",
> > > +			       &base, &end);
> > > +	}
> > > +}
> > > +
> > >   static void __init_memblock memblock_dump(struct memblock_type *type)
> > >   {
> > >   	phys_addr_t base, end, size;
> > > @@ -1955,6 +1977,7 @@ static void __init_memblock __memblock_dump_all(void)
> > >   		&memblock.memory.total_size,
> > >   		&memblock.reserved.total_size);
> > > +	memblock_reserved_update_node();
> > 
> > __memblock_dump_all() gets called only when memblock_debug is enabled.
> > This helper should be called directly inside memblock_dump_all() right
> > at the beginning, regardless of memblock_debug.
> 
> This is my first though, but I found there are still many memblock_alloc and
> memblock_reserve after memblock_dump_all(), so I update it twice,
> 
> 1) __memblock_dump_all()
> 2) memblock_debug_show()
> 
> and without the above two interface, no one care about the reserved node
> info, so I put memblock_reserved_update_node into __memblock_dump_all().
 
We don't care about the reserved node info and __memblock_dump_all()
actually does not print node info for reserved regions unless somebody
explicitly sets the node id on a reserved memory.

So instead of updating reserved memory node info I'd rather avoid printing
it in debugfs.
 
> > >   #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
> > > @@ -2196,6 +2219,8 @@ static int memblock_debug_show(struct seq_file *m, void *private)
> > >   	unsigned int count = ARRAY_SIZE(flagname);
> > >   	phys_addr_t end;
> > > +	memblock_reserved_update_node();
> > > +
> > 
> 
> > This is redundant, should be dropped. Reserved memblock ranges need not
> > be scanned, each time the sysfs file is accessed from user space.
> 
> Yes, it's better to move it into memblock_init_debugfs(),
> which only called once.
> 
> 
>
Kefeng Wang May 25, 2023, 1:28 a.m. UTC | #4
On 2023/5/24 23:33, Mike Rapoport wrote:
> On Wed, May 24, 2023 at 02:47:26PM +0800, Kefeng Wang wrote:
>>
>> On 2023/5/24 12:59, Anshuman Khandual wrote:
>>>

>>>
>>> __memblock_dump_all() gets called only when memblock_debug is enabled.
>>> This helper should be called directly inside memblock_dump_all() right
>>> at the beginning, regardless of memblock_debug.
>>
>> This is my first though, but I found there are still many memblock_alloc and
>> memblock_reserve after memblock_dump_all(), so I update it twice,
>>
>> 1) __memblock_dump_all()
>> 2) memblock_debug_show()
>>
>> and without the above two interface, no one care about the reserved node
>> info, so I put memblock_reserved_update_node into __memblock_dump_all().
>   
> We don't care about the reserved node info and __memblock_dump_all()
> actually does not print node info for reserved regions unless somebody
> explicitly sets the node id on a reserved memory.
> 
> So instead of updating reserved memory node info I'd rather avoid printing
> it in debugfs.

Ok, will skip nid = MAX_NUMNODES in debug show

diff --git a/mm/memblock.c b/mm/memblock.c
index c5c80d9bcea3..e6033de1f76d 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2169,17 +2169,19 @@ static int memblock_debug_show(struct seq_file 
*m, void *private)
  {
         struct memblock_type *type = m->private;
         struct memblock_region *reg;
-       int i, j;
+       int i, j, nid;
         unsigned int count = ARRAY_SIZE(flagname);
         phys_addr_t end;

         for (i = 0; i < type->cnt; i++) {
                 reg = &type->regions[i];
                 end = reg->base + reg->size - 1;
+               nid = memblock_get_region_node(reg);

                 seq_printf(m, "%4d: ", i);
                 seq_printf(m, "%pa..%pa ", &reg->base, &end);
-               seq_printf(m, "%4d ", memblock_get_region_node(reg));
+               if (nid != MAX_NUMNODES)
+                       seq_printf(m, "%4d ", nid);
                 if (reg->flags) {
                         for (j = 0; j < count; j++) {
                                 if (reg->flags & (1U << j)) {
diff mbox series

Patch

diff --git a/mm/memblock.c b/mm/memblock.c
index a50447d970ef..45a0781cda31 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1922,6 +1922,28 @@  phys_addr_t __init_memblock memblock_get_current_limit(void)
 	return memblock.current_limit;
 }
 
+static void __init_memblock memblock_reserved_update_node(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t base, end, size;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_NUMA))
+		return;
+
+	for_each_mem_region(rgn) {
+		base = rgn->base;
+		size = rgn->size;
+		end = base + size - 1;
+
+		ret = memblock_set_node(base, size, &memblock.reserved,
+					memblock_get_region_node(rgn));
+		if (ret)
+			pr_err("memblock: Failed to update reserved [%pa-%pa] node",
+			       &base, &end);
+	}
+}
+
 static void __init_memblock memblock_dump(struct memblock_type *type)
 {
 	phys_addr_t base, end, size;
@@ -1955,6 +1977,7 @@  static void __init_memblock __memblock_dump_all(void)
 		&memblock.memory.total_size,
 		&memblock.reserved.total_size);
 
+	memblock_reserved_update_node();
 	memblock_dump(&memblock.memory);
 	memblock_dump(&memblock.reserved);
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
@@ -2196,6 +2219,8 @@  static int memblock_debug_show(struct seq_file *m, void *private)
 	unsigned int count = ARRAY_SIZE(flagname);
 	phys_addr_t end;
 
+	memblock_reserved_update_node();
+
 	for (i = 0; i < type->cnt; i++) {
 		reg = &type->regions[i];
 		end = reg->base + reg->size - 1;