[5/5] mm/memory-hotplug: Rework unregister_mem_sect_under_nodes
diff mbox series

Message ID 20181015153034.32203-6-osalvador@techadventures.net
State New
Headers show
Series
  • Do not touch pages/zones during hot-remove path
Related show

Commit Message

Oscar Salvador Oct. 15, 2018, 3:30 p.m. UTC
From: Oscar Salvador <osalvador@suse.de>

This tries to address another issue about accessing
unitiliazed pages.

Jonathan reported a problem [1] where we can access steal pages
in case we hot-remove memory without onlining it first.

This time is in unregister_mem_sect_under_nodes.
This function tries to get the nid from the pfn and then
tries to remove the symlink between mem_blk <-> nid and vice versa.

Since we already know the nid in remove_memory(), we can pass
it down the chain to unregister_mem_sect_under_nodes.
There we can just remove the symlinks without the need
to look into the pages.

[1] https://www.spinics.net/lists/linux-mm/msg161316.html

Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
 drivers/base/memory.c  |  9 ++++-----
 drivers/base/node.c    | 38 +++++++-------------------------------
 include/linux/memory.h |  2 +-
 include/linux/node.h   |  9 ++++-----
 mm/memory_hotplug.c    |  2 +-
 5 files changed, 17 insertions(+), 43 deletions(-)

Comments

Jonathan Cameron Oct. 18, 2018, 2:24 p.m. UTC | #1
On Mon, 15 Oct 2018 17:30:34 +0200
Oscar Salvador <osalvador@techadventures.net> wrote:

> From: Oscar Salvador <osalvador@suse.de>
> 
> This tries to address another issue about accessing
> unitiliazed pages.
> 
> Jonathan reported a problem [1] where we can access steal pages
> in case we hot-remove memory without onlining it first.
> 
> This time is in unregister_mem_sect_under_nodes.
> This function tries to get the nid from the pfn and then
> tries to remove the symlink between mem_blk <-> nid and vice versa.
> 
> Since we already know the nid in remove_memory(), we can pass
> it down the chain to unregister_mem_sect_under_nodes.
> There we can just remove the symlinks without the need
> to look into the pages.
> 
> [1] https://www.spinics.net/lists/linux-mm/msg161316.html
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>

Hi Oscar,

I've not gone through these in detail, but I have run some
quick smoke tests and all seems fine with the out of tree arm64
patches we are carrying (with the obvious changes to match
up with those for other architectures that you make in this set).

Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

In particular (as expected) it fixes the issue you mention above
so I can drop the hack I had in place to avoid that :)

Thanks,

Jonathan

> ---
>  drivers/base/memory.c  |  9 ++++-----
>  drivers/base/node.c    | 38 +++++++-------------------------------
>  include/linux/memory.h |  2 +-
>  include/linux/node.h   |  9 ++++-----
>  mm/memory_hotplug.c    |  2 +-
>  5 files changed, 17 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 0e5985682642..3d8c65d84bea 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -744,8 +744,7 @@ unregister_memory(struct memory_block *memory)
>  	device_unregister(&memory->dev);
>  }
>  
> -static int remove_memory_section(unsigned long node_id,
> -			       struct mem_section *section, int phys_device)
> +static int remove_memory_section(unsigned long nid, struct mem_section *section)
>  {
>  	struct memory_block *mem;
>  
> @@ -759,7 +758,7 @@ static int remove_memory_section(unsigned long node_id,
>  	if (!mem)
>  		goto out_unlock;
>  
> -	unregister_mem_sect_under_nodes(mem, __section_nr(section));
> +	unregister_mem_sect_under_nodes(nid, mem);
>  
>  	mem->section_count--;
>  	if (mem->section_count == 0)
> @@ -772,12 +771,12 @@ static int remove_memory_section(unsigned long node_id,
>  	return 0;
>  }
>  
> -int unregister_memory_section(struct mem_section *section)
> +int unregister_memory_section(int nid, struct mem_section *section)
>  {
>  	if (!present_section(section))
>  		return -EINVAL;
>  
> -	return remove_memory_section(0, section, 0);
> +	return remove_memory_section(nid, section);
>  }
>  #endif /* CONFIG_MEMORY_HOTREMOVE */
>  
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index 86d6cd92ce3d..65bc5920bd3d 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -453,40 +453,16 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
>  	return 0;
>  }
>  
> -/* unregister memory section under all nodes that it spans */
> -int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
> -				    unsigned long phys_index)
> +/*
> + * This mem_blk is going to be removed, so let us remove the link
> + * to the node and vice versa
> + */
> +void unregister_mem_sect_under_nodes(int nid, struct memory_block *mem_blk)
>  {
> -	NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL);
> -	unsigned long pfn, sect_start_pfn, sect_end_pfn;
> -
> -	if (!mem_blk) {
> -		NODEMASK_FREE(unlinked_nodes);
> -		return -EFAULT;
> -	}
> -	if (!unlinked_nodes)
> -		return -ENOMEM;
> -	nodes_clear(*unlinked_nodes);
> -
> -	sect_start_pfn = section_nr_to_pfn(phys_index);
> -	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
> -	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
> -		int nid;
> -
> -		nid = get_nid_for_pfn(pfn);
> -		if (nid < 0)
> -			continue;
> -		if (!node_online(nid))
> -			continue;
> -		if (node_test_and_set(nid, *unlinked_nodes))
> -			continue;
> -		sysfs_remove_link(&node_devices[nid]->dev.kobj,
> +	sysfs_remove_link(&node_devices[nid]->dev.kobj,
>  			 kobject_name(&mem_blk->dev.kobj));
> -		sysfs_remove_link(&mem_blk->dev.kobj,
> +	sysfs_remove_link(&mem_blk->dev.kobj,
>  			 kobject_name(&node_devices[nid]->dev.kobj));
> -	}
> -	NODEMASK_FREE(unlinked_nodes);
> -	return 0;
>  }
>  
>  int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
> diff --git a/include/linux/memory.h b/include/linux/memory.h
> index a6ddefc60517..d75ec88ca09d 100644
> --- a/include/linux/memory.h
> +++ b/include/linux/memory.h
> @@ -113,7 +113,7 @@ extern int register_memory_isolate_notifier(struct notifier_block *nb);
>  extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
>  int hotplug_memory_register(int nid, struct mem_section *section);
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> -extern int unregister_memory_section(struct mem_section *);
> +extern int unregister_memory_section(int nid, struct mem_section *);
>  #endif
>  extern int memory_dev_init(void);
>  extern int memory_notify(unsigned long val, void *v);
> diff --git a/include/linux/node.h b/include/linux/node.h
> index 257bb3d6d014..dddead9937ab 100644
> --- a/include/linux/node.h
> +++ b/include/linux/node.h
> @@ -72,8 +72,8 @@ extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
>  extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
>  extern int register_mem_sect_under_node(struct memory_block *mem_blk,
>  						void *arg);
> -extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
> -					   unsigned long phys_index);
> +extern void unregister_mem_sect_under_nodes(int nid,
> +						struct memory_block *mem_blk);
>  
>  #ifdef CONFIG_HUGETLBFS
>  extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
> @@ -105,10 +105,9 @@ static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
>  {
>  	return 0;
>  }
> -static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
> -						  unsigned long phys_index)
> +static inline void unregister_mem_sect_under_nodes(int nid,
> +						struct memory_block *mem_blk)
>  {
> -	return 0;
>  }
>  
>  static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 6b98321aa52f..66ccbb5b8a88 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -528,7 +528,7 @@ static int __remove_section(int nid, struct mem_section *ms,
>  	if (!valid_section(ms))
>  		return ret;
>  
> -	ret = unregister_memory_section(ms);
> +	ret = unregister_memory_section(nid, ms);
>  	if (ret)
>  		return ret;
>
Oscar Salvador Oct. 18, 2018, 3:02 p.m. UTC | #2
On Thu, Oct 18, 2018 at 03:24:34PM +0100, Jonathan Cameron wrote:
> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Thanks a lot Jonathan for having tested it!

Did you test the whole serie or only this patch?
Since you have caught some bugs testing the memory-hotplug code
on ARM64, I wonder if you could test it with the whole serie
applied (if you got some free time, of course).


thanks again!
Jonathan Cameron Oct. 18, 2018, 3:38 p.m. UTC | #3
On Thu, 18 Oct 2018 17:02:21 +0200
Oscar Salvador <osalvador@techadventures.net> wrote:

> On Thu, Oct 18, 2018 at 03:24:34PM +0100, Jonathan Cameron wrote:
> > Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>  
> 
> Thanks a lot Jonathan for having tested it!
> 
> Did you test the whole serie or only this patch?
> Since you have caught some bugs testing the memory-hotplug code
> on ARM64, I wonder if you could test it with the whole serie
> applied (if you got some free time, of course).
> 
> 
> thanks again!

Sorry I should have said.  Whole series on latest mmotm as of yesterday.
Obviously that only tested some of the code paths as I didn't test
hmm at all.

There are a few more quirks to chase down on my list, but nothing
related to this series and all superficial stuff.

I'm away from my boards (or the remote connection to them anyway) until
the 29th so any other tests will probably have to wait until then.

It's not clear if we'll take the actual arm64 support forwards but
hopefully someone will pick it up in the near future if we don't.
The complexity around pfn_valid on arm64 may take some time to unwind.

Thanks,

Jonathan

Patch
diff mbox series

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 0e5985682642..3d8c65d84bea 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -744,8 +744,7 @@  unregister_memory(struct memory_block *memory)
 	device_unregister(&memory->dev);
 }
 
-static int remove_memory_section(unsigned long node_id,
-			       struct mem_section *section, int phys_device)
+static int remove_memory_section(unsigned long nid, struct mem_section *section)
 {
 	struct memory_block *mem;
 
@@ -759,7 +758,7 @@  static int remove_memory_section(unsigned long node_id,
 	if (!mem)
 		goto out_unlock;
 
-	unregister_mem_sect_under_nodes(mem, __section_nr(section));
+	unregister_mem_sect_under_nodes(nid, mem);
 
 	mem->section_count--;
 	if (mem->section_count == 0)
@@ -772,12 +771,12 @@  static int remove_memory_section(unsigned long node_id,
 	return 0;
 }
 
-int unregister_memory_section(struct mem_section *section)
+int unregister_memory_section(int nid, struct mem_section *section)
 {
 	if (!present_section(section))
 		return -EINVAL;
 
-	return remove_memory_section(0, section, 0);
+	return remove_memory_section(nid, section);
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 86d6cd92ce3d..65bc5920bd3d 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -453,40 +453,16 @@  int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
 	return 0;
 }
 
-/* unregister memory section under all nodes that it spans */
-int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
-				    unsigned long phys_index)
+/*
+ * This mem_blk is going to be removed, so let us remove the link
+ * to the node and vice versa
+ */
+void unregister_mem_sect_under_nodes(int nid, struct memory_block *mem_blk)
 {
-	NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL);
-	unsigned long pfn, sect_start_pfn, sect_end_pfn;
-
-	if (!mem_blk) {
-		NODEMASK_FREE(unlinked_nodes);
-		return -EFAULT;
-	}
-	if (!unlinked_nodes)
-		return -ENOMEM;
-	nodes_clear(*unlinked_nodes);
-
-	sect_start_pfn = section_nr_to_pfn(phys_index);
-	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
-	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
-		int nid;
-
-		nid = get_nid_for_pfn(pfn);
-		if (nid < 0)
-			continue;
-		if (!node_online(nid))
-			continue;
-		if (node_test_and_set(nid, *unlinked_nodes))
-			continue;
-		sysfs_remove_link(&node_devices[nid]->dev.kobj,
+	sysfs_remove_link(&node_devices[nid]->dev.kobj,
 			 kobject_name(&mem_blk->dev.kobj));
-		sysfs_remove_link(&mem_blk->dev.kobj,
+	sysfs_remove_link(&mem_blk->dev.kobj,
 			 kobject_name(&node_devices[nid]->dev.kobj));
-	}
-	NODEMASK_FREE(unlinked_nodes);
-	return 0;
 }
 
 int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
diff --git a/include/linux/memory.h b/include/linux/memory.h
index a6ddefc60517..d75ec88ca09d 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -113,7 +113,7 @@  extern int register_memory_isolate_notifier(struct notifier_block *nb);
 extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 int hotplug_memory_register(int nid, struct mem_section *section);
 #ifdef CONFIG_MEMORY_HOTREMOVE
-extern int unregister_memory_section(struct mem_section *);
+extern int unregister_memory_section(int nid, struct mem_section *);
 #endif
 extern int memory_dev_init(void);
 extern int memory_notify(unsigned long val, void *v);
diff --git a/include/linux/node.h b/include/linux/node.h
index 257bb3d6d014..dddead9937ab 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -72,8 +72,8 @@  extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int register_mem_sect_under_node(struct memory_block *mem_blk,
 						void *arg);
-extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
-					   unsigned long phys_index);
+extern void unregister_mem_sect_under_nodes(int nid,
+						struct memory_block *mem_blk);
 
 #ifdef CONFIG_HUGETLBFS
 extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
@@ -105,10 +105,9 @@  static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
 {
 	return 0;
 }
-static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
-						  unsigned long phys_index)
+static inline void unregister_mem_sect_under_nodes(int nid,
+						struct memory_block *mem_blk)
 {
-	return 0;
 }
 
 static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6b98321aa52f..66ccbb5b8a88 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -528,7 +528,7 @@  static int __remove_section(int nid, struct mem_section *ms,
 	if (!valid_section(ms))
 		return ret;
 
-	ret = unregister_memory_section(ms);
+	ret = unregister_memory_section(nid, ms);
 	if (ret)
 		return ret;