diff mbox series

[v5,2/3] mm/hotplug: make remove_memory() interface useable

Message ID 20190502184337.20538-3-pasha.tatashin@soleen.com (mailing list archive)
State New, archived
Headers show
Series "Hotremove" persistent memory | expand

Commit Message

Pasha Tatashin May 2, 2019, 6:43 p.m. UTC
As of right now remove_memory() interface is inherently broken. It tries
to remove memory but panics if some memory is not offline. The problem
is that it is impossible to ensure that all memory blocks are offline as
this function also takes lock_device_hotplug that is required to
change memory state via sysfs.

So, between calling this function and offlining all memory blocks there
is always a window when lock_device_hotplug is released, and therefore,
there is always a chance for a panic during this window.

Make this interface to return an error if memory removal fails. This way
it is safe to call this function without panicking machine, and also
makes it symmetric to add_memory() which already returns an error.

Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
---
 include/linux/memory_hotplug.h |  8 +++--
 mm/memory_hotplug.c            | 61 ++++++++++++++++++++++------------
 2 files changed, 46 insertions(+), 23 deletions(-)

Comments

David Hildenbrand May 3, 2019, 10:06 a.m. UTC | #1
On 02.05.19 20:43, Pavel Tatashin wrote:
> As of right now remove_memory() interface is inherently broken. It tries
> to remove memory but panics if some memory is not offline. The problem
> is that it is impossible to ensure that all memory blocks are offline as
> this function also takes lock_device_hotplug that is required to
> change memory state via sysfs.
> 

The existing interface can actually work today by registering a hotplug
notifier and rejecting any onlining attempts. But I agree that this way,
the interface becomes more usable.

> So, between calling this function and offlining all memory blocks there
> is always a window when lock_device_hotplug is released, and therefore,
> there is always a chance for a panic during this window.
> 
> Make this interface to return an error if memory removal fails. This way
> it is safe to call this function without panicking machine, and also
> makes it symmetric to add_memory() which already returns an error.
> 
> Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>> ---
>  include/linux/memory_hotplug.h |  8 +++--
>  mm/memory_hotplug.c            | 61 ++++++++++++++++++++++------------
>  2 files changed, 46 insertions(+), 23 deletions(-)
> 
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 8ade08c50d26..5438a2d92560 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -304,7 +304,7 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
>  extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
>  extern void try_offline_node(int nid);
>  extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
> -extern void remove_memory(int nid, u64 start, u64 size);
> +extern int remove_memory(int nid, u64 start, u64 size);
>  extern void __remove_memory(int nid, u64 start, u64 size);
>  
>  #else
> @@ -321,7 +321,11 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
>  	return -EINVAL;
>  }
>  
> -static inline void remove_memory(int nid, u64 start, u64 size) {}
> +static inline bool remove_memory(int nid, u64 start, u64 size)
> +{
> +	return -EBUSY;
> +}
> +
>  static inline void __remove_memory(int nid, u64 start, u64 size) {}
>  #endif /* CONFIG_MEMORY_HOTREMOVE */
>  
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 8c454e82d4f6..a826aededa1a 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1778,9 +1778,10 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
>  		endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
>  		pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
>  			&beginpa, &endpa);
> -	}
>  
> -	return ret;
> +		return -EBUSY;
> +	}
> +	return 0;
>  }
>  
>  static int check_cpu_on_node(pg_data_t *pgdat)
> @@ -1843,19 +1844,9 @@ void try_offline_node(int nid)
>  }
>  EXPORT_SYMBOL(try_offline_node);
>  
> -/**
> - * remove_memory
> - * @nid: the node ID
> - * @start: physical address of the region to remove
> - * @size: size of the region to remove
> - *
> - * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
> - * and online/offline operations before this call, as required by
> - * try_offline_node().
> - */
> -void __ref __remove_memory(int nid, u64 start, u64 size)
> +static int __ref try_remove_memory(int nid, u64 start, u64 size)
>  {
> -	int ret;
> +	int rc = 0;
>  
>  	BUG_ON(check_hotplug_memory_range(start, size));
>  
> @@ -1863,13 +1854,13 @@ void __ref __remove_memory(int nid, u64 start, u64 size)
>  
>  	/*
>  	 * All memory blocks must be offlined before removing memory.  Check
> -	 * whether all memory blocks in question are offline and trigger a BUG()
> +	 * whether all memory blocks in question are offline and return error
>  	 * if this is not the case.
>  	 */
> -	ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
> -				check_memblock_offlined_cb);
> -	if (ret)
> -		BUG();
> +	rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
> +			       check_memblock_offlined_cb);
> +	if (rc)
> +		goto done;
>  
>  	/* remove memmap entry */
>  	firmware_map_remove(start, start + size, "System RAM");
> @@ -1879,14 +1870,42 @@ void __ref __remove_memory(int nid, u64 start, u64 size)
>  
>  	try_offline_node(nid);
>  
> +done:
>  	mem_hotplug_done();
> +	return rc;
>  }
>  
> -void remove_memory(int nid, u64 start, u64 size)
> +/**
> + * remove_memory
> + * @nid: the node ID
> + * @start: physical address of the region to remove
> + * @size: size of the region to remove
> + *
> + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
> + * and online/offline operations before this call, as required by
> + * try_offline_node().
> + */
> +void __remove_memory(int nid, u64 start, u64 size)
>  {
> +
> +	/*
> +	 * trigger BUG() is some memory is not offlined prior to calling this
> +	 * function
> +	 */
> +	if (try_remove_memory(nid, start, size))
> +		BUG();
> +}
> +
> +/* Remove memory if every memory block is offline, otherwise return false */

Comment is wrong "return false"

> +int remove_memory(int nid, u64 start, u64 size)
> +{
> +	int rc;
> +
>  	lock_device_hotplug();
> -	__remove_memory(nid, start, size);
> +	rc  = try_remove_memory(nid, start, size);
>  	unlock_device_hotplug();
> +
> +	return rc;
>  }
>  EXPORT_SYMBOL_GPL(remove_memory);
>  #endif /* CONFIG_MEMORY_HOTREMOVE */
> 


Looks sane to me

Reviewed-by: David Hildenbrand <david@redhat.com>
Dave Hansen May 6, 2019, 5:57 p.m. UTC | #2
> -static inline void remove_memory(int nid, u64 start, u64 size) {}
> +static inline bool remove_memory(int nid, u64 start, u64 size)
> +{
> +	return -EBUSY;
> +}

This seems like an appropriate place for a WARN_ONCE(), if someone
manages to call remove_memory() with hotplug disabled.

BTW, I looked and can't think of a better errno, but -EBUSY probably
isn't the best error code, right?

> -void remove_memory(int nid, u64 start, u64 size)
> +/**
> + * remove_memory
> + * @nid: the node ID
> + * @start: physical address of the region to remove
> + * @size: size of the region to remove
> + *
> + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
> + * and online/offline operations before this call, as required by
> + * try_offline_node().
> + */
> +void __remove_memory(int nid, u64 start, u64 size)
>  {
> +
> +	/*
> +	 * trigger BUG() is some memory is not offlined prior to calling this
> +	 * function
> +	 */
> +	if (try_remove_memory(nid, start, size))
> +		BUG();
> +}

Could we call this remove_offline_memory()?  That way, it makes _some_
sense why we would BUG() if the memory isn't offline.
Dan Williams May 6, 2019, 6:01 p.m. UTC | #3
On Mon, May 6, 2019 at 10:57 AM Dave Hansen <dave.hansen@intel.com> wrote:
>
> > -static inline void remove_memory(int nid, u64 start, u64 size) {}
> > +static inline bool remove_memory(int nid, u64 start, u64 size)
> > +{
> > +     return -EBUSY;
> > +}
>
> This seems like an appropriate place for a WARN_ONCE(), if someone
> manages to call remove_memory() with hotplug disabled.
>
> BTW, I looked and can't think of a better errno, but -EBUSY probably
> isn't the best error code, right?
>
> > -void remove_memory(int nid, u64 start, u64 size)
> > +/**
> > + * remove_memory
> > + * @nid: the node ID
> > + * @start: physical address of the region to remove
> > + * @size: size of the region to remove
> > + *
> > + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
> > + * and online/offline operations before this call, as required by
> > + * try_offline_node().
> > + */
> > +void __remove_memory(int nid, u64 start, u64 size)
> >  {
> > +
> > +     /*
> > +      * trigger BUG() is some memory is not offlined prior to calling this
> > +      * function
> > +      */
> > +     if (try_remove_memory(nid, start, size))
> > +             BUG();
> > +}
>
> Could we call this remove_offline_memory()?  That way, it makes _some_
> sense why we would BUG() if the memory isn't offline.

Please WARN() instead of BUG() because failing to remove memory should
not be system fatal.
Dave Hansen May 6, 2019, 6:04 p.m. UTC | #4
On 5/6/19 11:01 AM, Dan Williams wrote:
>>> +void __remove_memory(int nid, u64 start, u64 size)
>>>  {
>>> +
>>> +     /*
>>> +      * trigger BUG() is some memory is not offlined prior to calling this
>>> +      * function
>>> +      */
>>> +     if (try_remove_memory(nid, start, size))
>>> +             BUG();
>>> +}
>> Could we call this remove_offline_memory()?  That way, it makes _some_
>> sense why we would BUG() if the memory isn't offline.
> Please WARN() instead of BUG() because failing to remove memory should
> not be system fatal.

That is my preference as well.  But, the existing code BUG()s, so I'm
OK-ish with this staying for the moment until we have a better handle on
what all the callers do if this fails.
Pasha Tatashin May 6, 2019, 6:13 p.m. UTC | #5
On Mon, May 6, 2019 at 1:57 PM Dave Hansen <dave.hansen@intel.com> wrote:
>
> > -static inline void remove_memory(int nid, u64 start, u64 size) {}
> > +static inline bool remove_memory(int nid, u64 start, u64 size)
> > +{
> > +     return -EBUSY;
> > +}
>
> This seems like an appropriate place for a WARN_ONCE(), if someone
> manages to call remove_memory() with hotplug disabled.
>
> BTW, I looked and can't think of a better errno, but -EBUSY probably
> isn't the best error code, right?

Same here, I looked and did not find any better then -EBUSY. Also, it
is close to check_cpu_on_node() in the same file.

>
> > -void remove_memory(int nid, u64 start, u64 size)
> > +/**
> > + * remove_memory
> > + * @nid: the node ID
> > + * @start: physical address of the region to remove
> > + * @size: size of the region to remove
> > + *
> > + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
> > + * and online/offline operations before this call, as required by
> > + * try_offline_node().
> > + */
> > +void __remove_memory(int nid, u64 start, u64 size)
> >  {
> > +
> > +     /*
> > +      * trigger BUG() is some memory is not offlined prior to calling this
> > +      * function
> > +      */
> > +     if (try_remove_memory(nid, start, size))
> > +             BUG();
> > +}
>
> Could we call this remove_offline_memory()?  That way, it makes _some_
> sense why we would BUG() if the memory isn't offline.

Sure, I will rename this function.

Thank you,
Pasha
Pasha Tatashin May 6, 2019, 6:18 p.m. UTC | #6
On Mon, May 6, 2019 at 2:04 PM Dave Hansen <dave.hansen@intel.com> wrote:
>
> On 5/6/19 11:01 AM, Dan Williams wrote:
> >>> +void __remove_memory(int nid, u64 start, u64 size)
> >>>  {
> >>> +
> >>> +     /*
> >>> +      * trigger BUG() is some memory is not offlined prior to calling this
> >>> +      * function
> >>> +      */
> >>> +     if (try_remove_memory(nid, start, size))
> >>> +             BUG();
> >>> +}
> >> Could we call this remove_offline_memory()?  That way, it makes _some_
> >> sense why we would BUG() if the memory isn't offline.
> > Please WARN() instead of BUG() because failing to remove memory should
> > not be system fatal.
>
> That is my preference as well.  But, the existing code BUG()s, so I'm
> OK-ish with this staying for the moment until we have a better handle on
> what all the callers do if this fails.

Yes, this is the reason why I BUG() here. The current code does this,
and I was not sure what would happen if we simply continue executing.
Of course, I would prefer to return failure, so the callers can act
appropriately, but let's make one thing at a time, this should not be
part of this series.

Thank you,
Pasha
Pasha Tatashin May 17, 2019, 6:10 p.m. UTC | #7
Hi Dan,

Thank you very much for your review, my comments below:

On Mon, May 6, 2019 at 2:01 PM Dan Williams <dan.j.williams@intel.com> wrote:
>
> On Mon, May 6, 2019 at 10:57 AM Dave Hansen <dave.hansen@intel.com> wrote:
> >
> > > -static inline void remove_memory(int nid, u64 start, u64 size) {}
> > > +static inline bool remove_memory(int nid, u64 start, u64 size)
> > > +{
> > > +     return -EBUSY;
> > > +}
> >
> > This seems like an appropriate place for a WARN_ONCE(), if someone
> > manages to call remove_memory() with hotplug disabled.

I decided not to do WARN_ONCE(), in all likelihood compiler will
simply optimize this function out, but with WARN_ONCE() some traces of
it will remain.

> >
> > BTW, I looked and can't think of a better errno, but -EBUSY probably
> > isn't the best error code, right?

-EBUSY is the only error that is returned in case of error by real
remove_memory(), so I think it is OK to keep it here.

> >
> > > -void remove_memory(int nid, u64 start, u64 size)
> > > +/**
> > > + * remove_memory
> > > + * @nid: the node ID
> > > + * @start: physical address of the region to remove
> > > + * @size: size of the region to remove
> > > + *
> > > + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
> > > + * and online/offline operations before this call, as required by
> > > + * try_offline_node().
> > > + */
> > > +void __remove_memory(int nid, u64 start, u64 size)
> > >  {
> > > +
> > > +     /*
> > > +      * trigger BUG() is some memory is not offlined prior to calling this
> > > +      * function
> > > +      */
> > > +     if (try_remove_memory(nid, start, size))
> > > +             BUG();
> > > +}
> >
> > Could we call this remove_offline_memory()?  That way, it makes _some_
> > sense why we would BUG() if the memory isn't offline.

It is this particular code path, the second one: remove_memory(),
actually tries to remove memory and returns failure if it can't. So, I
think the current name is OK.

>
> Please WARN() instead of BUG() because failing to remove memory should
> not be system fatal.

As mentioned earlier, I will keep BUG(), because existing code does
that, and there is no good handling of this code to return on error.

Thank you,
Pavel
diff mbox series

Patch

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8ade08c50d26..5438a2d92560 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -304,7 +304,7 @@  static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
 extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
 extern void try_offline_node(int nid);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
-extern void remove_memory(int nid, u64 start, u64 size);
+extern int remove_memory(int nid, u64 start, u64 size);
 extern void __remove_memory(int nid, u64 start, u64 size);
 
 #else
@@ -321,7 +321,11 @@  static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 	return -EINVAL;
 }
 
-static inline void remove_memory(int nid, u64 start, u64 size) {}
+static inline bool remove_memory(int nid, u64 start, u64 size)
+{
+	return -EBUSY;
+}
+
 static inline void __remove_memory(int nid, u64 start, u64 size) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8c454e82d4f6..a826aededa1a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1778,9 +1778,10 @@  static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
 		endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
 		pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
 			&beginpa, &endpa);
-	}
 
-	return ret;
+		return -EBUSY;
+	}
+	return 0;
 }
 
 static int check_cpu_on_node(pg_data_t *pgdat)
@@ -1843,19 +1844,9 @@  void try_offline_node(int nid)
 }
 EXPORT_SYMBOL(try_offline_node);
 
-/**
- * remove_memory
- * @nid: the node ID
- * @start: physical address of the region to remove
- * @size: size of the region to remove
- *
- * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
- * and online/offline operations before this call, as required by
- * try_offline_node().
- */
-void __ref __remove_memory(int nid, u64 start, u64 size)
+static int __ref try_remove_memory(int nid, u64 start, u64 size)
 {
-	int ret;
+	int rc = 0;
 
 	BUG_ON(check_hotplug_memory_range(start, size));
 
@@ -1863,13 +1854,13 @@  void __ref __remove_memory(int nid, u64 start, u64 size)
 
 	/*
 	 * All memory blocks must be offlined before removing memory.  Check
-	 * whether all memory blocks in question are offline and trigger a BUG()
+	 * whether all memory blocks in question are offline and return error
 	 * if this is not the case.
 	 */
-	ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
-				check_memblock_offlined_cb);
-	if (ret)
-		BUG();
+	rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
+			       check_memblock_offlined_cb);
+	if (rc)
+		goto done;
 
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size, "System RAM");
@@ -1879,14 +1870,42 @@  void __ref __remove_memory(int nid, u64 start, u64 size)
 
 	try_offline_node(nid);
 
+done:
 	mem_hotplug_done();
+	return rc;
 }
 
-void remove_memory(int nid, u64 start, u64 size)
+/**
+ * remove_memory
+ * @nid: the node ID
+ * @start: physical address of the region to remove
+ * @size: size of the region to remove
+ *
+ * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
+ * and online/offline operations before this call, as required by
+ * try_offline_node().
+ */
+void __remove_memory(int nid, u64 start, u64 size)
 {
+
+	/*
+	 * trigger BUG() is some memory is not offlined prior to calling this
+	 * function
+	 */
+	if (try_remove_memory(nid, start, size))
+		BUG();
+}
+
+/* Remove memory if every memory block is offline, otherwise return false */
+int remove_memory(int nid, u64 start, u64 size)
+{
+	int rc;
+
 	lock_device_hotplug();
-	__remove_memory(nid, start, size);
+	rc  = try_remove_memory(nid, start, size);
 	unlock_device_hotplug();
+
+	return rc;
 }
 EXPORT_SYMBOL_GPL(remove_memory);
 #endif /* CONFIG_MEMORY_HOTREMOVE */