diff mbox

[v5,14/14] memory-hotplug: free node_data when a node is offlined

Message ID 1356350964-13437-15-git-send-email-tangchen@cn.fujitsu.com (mailing list archive)
State Superseded
Headers show

Commit Message

tangchen Dec. 24, 2012, 12:09 p.m. UTC
From: Wen Congyang <wency@cn.fujitsu.com>

We call hotadd_new_pgdat() to allocate memory to store node_data. So we
should free it when removing a node.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 mm/memory_hotplug.c |   20 +++++++++++++++++++-
 1 files changed, 19 insertions(+), 1 deletions(-)

Comments

KAMEZAWA Hiroyuki Dec. 26, 2012, 3:55 a.m. UTC | #1
(2012/12/24 21:09), Tang Chen wrote:
> From: Wen Congyang <wency@cn.fujitsu.com>
> 
> We call hotadd_new_pgdat() to allocate memory to store node_data. So we
> should free it when removing a node.
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>

I'm sorry but is it safe to remove pgdat ? All zone cache and zonelists are
properly cleared/rebuilded in synchronous way ? and No threads are visinting
zone in vmscan.c ?

Thanks,
-Kame

> ---
>   mm/memory_hotplug.c |   20 +++++++++++++++++++-
>   1 files changed, 19 insertions(+), 1 deletions(-)
> 
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index f8a1d2f..447fa24 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1680,9 +1680,12 @@ static int check_cpu_on_node(void *data)
>   /* offline the node if all memory sections of this node are removed */
>   static void try_offline_node(int nid)
>   {
> +	pg_data_t *pgdat = NODE_DATA(nid);
>   	unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
> -	unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
> +	unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
>   	unsigned long pfn;
> +	struct page *pgdat_page = virt_to_page(pgdat);
> +	int i;
>   
>   	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>   		unsigned long section_nr = pfn_to_section_nr(pfn);
> @@ -1709,6 +1712,21 @@ static void try_offline_node(int nid)
>   	 */
>   	node_set_offline(nid);
>   	unregister_one_node(nid);
> +
> +	if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page))
> +		/* node data is allocated from boot memory */
> +		return;
> +
> +	/* free waittable in each zone */
> +	for (i = 0; i < MAX_NR_ZONES; i++) {
> +		struct zone *zone = pgdat->node_zones + i;
> +
> +		if (zone->wait_table)
> +			vfree(zone->wait_table);
> +	}
> +
> +	arch_refresh_nodedata(nid, NULL);
> +	arch_free_nodedata(pgdat);
>   }
>   
>   int __ref remove_memory(int nid, u64 start, u64 size)
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wen Congyang Dec. 27, 2012, 12:16 p.m. UTC | #2
At 12/26/2012 11:55 AM, Kamezawa Hiroyuki Wrote:
> (2012/12/24 21:09), Tang Chen wrote:
>> From: Wen Congyang <wency@cn.fujitsu.com>
>>
>> We call hotadd_new_pgdat() to allocate memory to store node_data. So we
>> should free it when removing a node.
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> 
> I'm sorry but is it safe to remove pgdat ? All zone cache and zonelists are
> properly cleared/rebuilded in synchronous way ? and No threads are visinting
> zone in vmscan.c ?

We have rebuilt zonelists when a zone has no memory after offlining some pages.

Thanks
Wen Congyang

> 
> Thanks,
> -Kame
> 
>> ---
>>   mm/memory_hotplug.c |   20 +++++++++++++++++++-
>>   1 files changed, 19 insertions(+), 1 deletions(-)
>>
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index f8a1d2f..447fa24 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1680,9 +1680,12 @@ static int check_cpu_on_node(void *data)
>>   /* offline the node if all memory sections of this node are removed */
>>   static void try_offline_node(int nid)
>>   {
>> +	pg_data_t *pgdat = NODE_DATA(nid);
>>   	unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
>> -	unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
>> +	unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
>>   	unsigned long pfn;
>> +	struct page *pgdat_page = virt_to_page(pgdat);
>> +	int i;
>>   
>>   	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>>   		unsigned long section_nr = pfn_to_section_nr(pfn);
>> @@ -1709,6 +1712,21 @@ static void try_offline_node(int nid)
>>   	 */
>>   	node_set_offline(nid);
>>   	unregister_one_node(nid);
>> +
>> +	if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page))
>> +		/* node data is allocated from boot memory */
>> +		return;
>> +
>> +	/* free waittable in each zone */
>> +	for (i = 0; i < MAX_NR_ZONES; i++) {
>> +		struct zone *zone = pgdat->node_zones + i;
>> +
>> +		if (zone->wait_table)
>> +			vfree(zone->wait_table);
>> +	}
>> +
>> +	arch_refresh_nodedata(nid, NULL);
>> +	arch_free_nodedata(pgdat);
>>   }
>>   
>>   int __ref remove_memory(int nid, u64 start, u64 size)
>>
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
KAMEZAWA Hiroyuki Dec. 28, 2012, 12:28 a.m. UTC | #3
(2012/12/27 21:16), Wen Congyang wrote:
> At 12/26/2012 11:55 AM, Kamezawa Hiroyuki Wrote:
>> (2012/12/24 21:09), Tang Chen wrote:
>>> From: Wen Congyang <wency@cn.fujitsu.com>
>>>
>>> We call hotadd_new_pgdat() to allocate memory to store node_data. So we
>>> should free it when removing a node.
>>>
>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>
>> I'm sorry but is it safe to remove pgdat ? All zone cache and zonelists are
>> properly cleared/rebuilded in synchronous way ? and No threads are visinting
>> zone in vmscan.c ?
> 
> We have rebuilt zonelists when a zone has no memory after offlining some pages.
> 

How do you guarantee that the address of pgdat/zone is not on stack of any kernel
threads or other kernel objects without reference counting or other syncing method ?


Thanks,
-Kame


--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wen Congyang Dec. 30, 2012, 6:02 a.m. UTC | #4
At 12/28/2012 08:28 AM, Kamezawa Hiroyuki Wrote:
> (2012/12/27 21:16), Wen Congyang wrote:
>> At 12/26/2012 11:55 AM, Kamezawa Hiroyuki Wrote:
>>> (2012/12/24 21:09), Tang Chen wrote:
>>>> From: Wen Congyang <wency@cn.fujitsu.com>
>>>>
>>>> We call hotadd_new_pgdat() to allocate memory to store node_data. So we
>>>> should free it when removing a node.
>>>>
>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>>
>>> I'm sorry but is it safe to remove pgdat ? All zone cache and zonelists are
>>> properly cleared/rebuilded in synchronous way ? and No threads are visinting
>>> zone in vmscan.c ?
>>
>> We have rebuilt zonelists when a zone has no memory after offlining some pages.
>>
> 
> How do you guarantee that the address of pgdat/zone is not on stack of any kernel
> threads or other kernel objects without reference counting or other syncing method ?

No way to guarentee this. But, the kernel should not use the address of pgdat/zone when
it is offlined.

Hmm, what about this: reuse the memory when the node is onlined again?

Thanks
Wen Congyang

> 
> 
> Thanks,
> -Kame
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
KAMEZAWA Hiroyuki Jan. 7, 2013, 5:30 a.m. UTC | #5
(2012/12/30 15:02), Wen Congyang wrote:
> At 12/28/2012 08:28 AM, Kamezawa Hiroyuki Wrote:
>> (2012/12/27 21:16), Wen Congyang wrote:
>>> At 12/26/2012 11:55 AM, Kamezawa Hiroyuki Wrote:
>>>> (2012/12/24 21:09), Tang Chen wrote:
>>>>> From: Wen Congyang <wency@cn.fujitsu.com>
>>>>>
>>>>> We call hotadd_new_pgdat() to allocate memory to store node_data. So we
>>>>> should free it when removing a node.
>>>>>
>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>>>
>>>> I'm sorry but is it safe to remove pgdat ? All zone cache and zonelists are
>>>> properly cleared/rebuilded in synchronous way ? and No threads are visinting
>>>> zone in vmscan.c ?
>>>
>>> We have rebuilt zonelists when a zone has no memory after offlining some pages.
>>>
>>
>> How do you guarantee that the address of pgdat/zone is not on stack of any kernel
>> threads or other kernel objects without reference counting or other syncing method ?
> 
> No way to guarentee this. But, the kernel should not use the address of pgdat/zone when
> it is offlined.
> 
> Hmm, what about this: reuse the memory when the node is onlined again?
> 

That's the only way which we can go now. Please don't free it.

Thanks,
-Kame


--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f8a1d2f..447fa24 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1680,9 +1680,12 @@  static int check_cpu_on_node(void *data)
 /* offline the node if all memory sections of this node are removed */
 static void try_offline_node(int nid)
 {
+	pg_data_t *pgdat = NODE_DATA(nid);
 	unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
-	unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
+	unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
 	unsigned long pfn;
+	struct page *pgdat_page = virt_to_page(pgdat);
+	int i;
 
 	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 		unsigned long section_nr = pfn_to_section_nr(pfn);
@@ -1709,6 +1712,21 @@  static void try_offline_node(int nid)
 	 */
 	node_set_offline(nid);
 	unregister_one_node(nid);
+
+	if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page))
+		/* node data is allocated from boot memory */
+		return;
+
+	/* free waittable in each zone */
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		struct zone *zone = pgdat->node_zones + i;
+
+		if (zone->wait_table)
+			vfree(zone->wait_table);
+	}
+
+	arch_refresh_nodedata(nid, NULL);
+	arch_free_nodedata(pgdat);
 }
 
 int __ref remove_memory(int nid, u64 start, u64 size)