diff mbox series

[v6,1/4] mm,hwpoison,hugetlb,memory_hotplug: hotremove memory section with hwpoisoned hugepage

Message ID 20221007010706.2916472-2-naoya.horiguchi@linux.dev (mailing list archive)
State New
Headers show
Series mm, hwpoison: improve handling workload related to hugetlb and memory_hotplug | expand

Commit Message

Naoya Horiguchi Oct. 7, 2022, 1:07 a.m. UTC
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

HWPoisoned page is not supposed to be accessed once marked, but currently
such accesses can happen during memory hotremove because do_migrate_range()
can be called before dissolve_free_huge_pages() is called.

Clear HPageMigratable for hwpoisoned hugepages to prevent them from being
migrated.  This should be done in hugetlb_lock to avoid race against
isolate_hugetlb().

get_hwpoison_huge_page() needs to have a flag to show it's called from
unpoison to take refcount of hwpoisoned hugepages, so add it.

Reported-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
---
ChangeLog v3 -> v6:
- introduce migratable_cleared to remember that HPageMigratable is
  cleared in error handling.  It's needed to cancel when an error event
  is filtered by hwpoison_filter(). (Thanks to Miaohe)

ChangeLog v2 -> v3
- move to the approach of clearing HPageMigratable instead of shifting
  dissolve_free_huge_pages.
---
 include/linux/hugetlb.h | 10 ++++++----
 include/linux/mm.h      |  6 ++++--
 mm/hugetlb.c            |  9 +++++----
 mm/memory-failure.c     | 21 +++++++++++++++++----
 4 files changed, 32 insertions(+), 14 deletions(-)

Comments

Oscar Salvador Oct. 13, 2022, 2:17 p.m. UTC | #1
On Fri, Oct 07, 2022 at 10:07:03AM +0900, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> HWPoisoned page is not supposed to be accessed once marked, but currently
> such accesses can happen during memory hotremove because do_migrate_range()
> can be called before dissolve_free_huge_pages() is called.
> 
> Clear HPageMigratable for hwpoisoned hugepages to prevent them from being
> migrated.  This should be done in hugetlb_lock to avoid race against
> isolate_hugetlb().
> 
> get_hwpoison_huge_page() needs to have a flag to show it's called from
> unpoison to take refcount of hwpoisoned hugepages, so add it.
> 
> Reported-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>

I could not spot any red flags:

Reviewed-by: Oscar Salvador <osalvador@suse.de>

> ---
> ChangeLog v3 -> v6:
> - introduce migratable_cleared to remember that HPageMigratable is
>   cleared in error handling.  It's needed to cancel when an error event
>   is filtered by hwpoison_filter(). (Thanks to Miaohe)
> 
> ChangeLog v2 -> v3
> - move to the approach of clearing HPageMigratable instead of shifting
>   dissolve_free_huge_pages.
> ---
>  include/linux/hugetlb.h | 10 ++++++----
>  include/linux/mm.h      |  6 ++++--
>  mm/hugetlb.c            |  9 +++++----
>  mm/memory-failure.c     | 21 +++++++++++++++++----
>  4 files changed, 32 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 32d45e96a894..19b99ff7fea0 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -183,8 +183,9 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
>  long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
>  						long freed);
>  int isolate_hugetlb(struct page *page, struct list_head *list);
> -int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
> +int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
> +int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +				bool *migratable_cleared);
>  void putback_active_hugepage(struct page *page);
>  void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
>  void free_huge_page(struct page *page);
> @@ -391,12 +392,13 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list)
>  	return -EBUSY;
>  }
>  
> -static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
> +static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
>  {
>  	return 0;
>  }
>  
> -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> +static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +					bool *migratable_cleared)
>  {
>  	return 0;
>  }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 8bbcccbc5565..3264bf993ad8 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3277,9 +3277,11 @@ extern void shake_page(struct page *p);
>  extern atomic_long_t num_poisoned_pages __read_mostly;
>  extern int soft_offline_page(unsigned long pfn, int flags);
>  #ifdef CONFIG_MEMORY_FAILURE
> -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
> +extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +					bool *migratable_cleared);
>  #else
> -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> +static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +					bool *migratable_cleared)
>  {
>  	return 0;
>  }
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 63fe47a0240a..0e482dfaf92e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -7253,7 +7253,7 @@ int isolate_hugetlb(struct page *page, struct list_head *list)
>  	return ret;
>  }
>  
> -int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
> +int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
>  {
>  	int ret = 0;
>  
> @@ -7263,7 +7263,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
>  		*hugetlb = true;
>  		if (HPageFreed(page))
>  			ret = 0;
> -		else if (HPageMigratable(page))
> +		else if (HPageMigratable(page) || unpoison)
>  			ret = get_page_unless_zero(page);
>  		else
>  			ret = -EBUSY;
> @@ -7272,12 +7272,13 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
>  	return ret;
>  }
>  
> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> +int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +				bool *migratable_cleared)
>  {
>  	int ret;
>  
>  	spin_lock_irq(&hugetlb_lock);
> -	ret = __get_huge_page_for_hwpoison(pfn, flags);
> +	ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
>  	spin_unlock_irq(&hugetlb_lock);
>  	return ret;
>  }
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 145bb561ddb3..d4fef56c0438 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1244,7 +1244,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
>  	int ret = 0;
>  	bool hugetlb = false;
>  
> -	ret = get_hwpoison_huge_page(head, &hugetlb);
> +	ret = get_hwpoison_huge_page(head, &hugetlb, false);
>  	if (hugetlb)
>  		return ret;
>  
> @@ -1334,7 +1334,7 @@ static int __get_unpoison_page(struct page *page)
>  	int ret = 0;
>  	bool hugetlb = false;
>  
> -	ret = get_hwpoison_huge_page(head, &hugetlb);
> +	ret = get_hwpoison_huge_page(head, &hugetlb, true);
>  	if (hugetlb)
>  		return ret;
>  
> @@ -1785,7 +1785,8 @@ void hugetlb_clear_page_hwpoison(struct page *hpage)
>   *   -EBUSY        - the hugepage is busy (try to retry)
>   *   -EHWPOISON    - the hugepage is already hwpoisoned
>   */
> -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> +int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +				 bool *migratable_cleared)
>  {
>  	struct page *page = pfn_to_page(pfn);
>  	struct page *head = compound_head(page);
> @@ -1815,6 +1816,15 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
>  		goto out;
>  	}
>  
> +	/*
> +	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
> +	 * from being migrated by memory hotremove.
> +	 */
> +	if (count_increased) {
> +		*migratable_cleared = true;
> +		ClearHPageMigratable(head);
> +	}
> +
>  	return ret;
>  out:
>  	if (count_increased)
> @@ -1834,10 +1844,11 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>  	struct page *p = pfn_to_page(pfn);
>  	struct page *head;
>  	unsigned long page_flags;
> +	bool migratable_cleared = false;
>  
>  	*hugetlb = 1;
>  retry:
> -	res = get_huge_page_for_hwpoison(pfn, flags);
> +	res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
>  	if (res == 2) { /* fallback to normal page handling */
>  		*hugetlb = 0;
>  		return 0;
> @@ -1862,6 +1873,8 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>  
>  	if (hwpoison_filter(p)) {
>  		hugetlb_clear_page_hwpoison(head);
> +		if (migratable_cleared)
> +			SetHPageMigratable(head);
>  		unlock_page(head);
>  		if (res == 1)
>  			put_page(head);
> -- 
> 2.25.1
> 
>
Miaohe Lin Oct. 15, 2022, 1:58 a.m. UTC | #2
On 2022/10/7 9:07, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> HWPoisoned page is not supposed to be accessed once marked, but currently
> such accesses can happen during memory hotremove because do_migrate_range()
> can be called before dissolve_free_huge_pages() is called.
> 
> Clear HPageMigratable for hwpoisoned hugepages to prevent them from being
> migrated.  This should be done in hugetlb_lock to avoid race against
> isolate_hugetlb().
> 
> get_hwpoison_huge_page() needs to have a flag to show it's called from
> unpoison to take refcount of hwpoisoned hugepages, so add it.
> 
> Reported-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>

Sorry for late respond. I was spending a busy week. :) And thanks for your work, Naoya.

> ---
> ChangeLog v3 -> v6:
> - introduce migratable_cleared to remember that HPageMigratable is
>   cleared in error handling.  It's needed to cancel when an error event
>   is filtered by hwpoison_filter(). (Thanks to Miaohe)
> 
> ChangeLog v2 -> v3
> - move to the approach of clearing HPageMigratable instead of shifting
>   dissolve_free_huge_pages.
> ---
>  include/linux/hugetlb.h | 10 ++++++----
>  include/linux/mm.h      |  6 ++++--
>  mm/hugetlb.c            |  9 +++++----
>  mm/memory-failure.c     | 21 +++++++++++++++++----
>  4 files changed, 32 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 32d45e96a894..19b99ff7fea0 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -183,8 +183,9 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
>  long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
>  						long freed);
>  int isolate_hugetlb(struct page *page, struct list_head *list);
> -int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
> +int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
> +int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +				bool *migratable_cleared);
>  void putback_active_hugepage(struct page *page);
>  void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
>  void free_huge_page(struct page *page);
> @@ -391,12 +392,13 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list)
>  	return -EBUSY;
>  }
>  
> -static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
> +static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
>  {
>  	return 0;
>  }
>  
> -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> +static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +					bool *migratable_cleared)
>  {
>  	return 0;
>  }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 8bbcccbc5565..3264bf993ad8 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3277,9 +3277,11 @@ extern void shake_page(struct page *p);
>  extern atomic_long_t num_poisoned_pages __read_mostly;
>  extern int soft_offline_page(unsigned long pfn, int flags);
>  #ifdef CONFIG_MEMORY_FAILURE
> -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
> +extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +					bool *migratable_cleared);
>  #else
> -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> +static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +					bool *migratable_cleared)
>  {
>  	return 0;
>  }
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 63fe47a0240a..0e482dfaf92e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -7253,7 +7253,7 @@ int isolate_hugetlb(struct page *page, struct list_head *list)
>  	return ret;
>  }
>  
> -int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
> +int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
>  {
>  	int ret = 0;
>  
> @@ -7263,7 +7263,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
>  		*hugetlb = true;
>  		if (HPageFreed(page))
>  			ret = 0;
> -		else if (HPageMigratable(page))
> +		else if (HPageMigratable(page) || unpoison)
>  			ret = get_page_unless_zero(page);
>  		else
>  			ret = -EBUSY;
> @@ -7272,12 +7272,13 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
>  	return ret;
>  }
>  
> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> +int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +				bool *migratable_cleared)
>  {
>  	int ret;
>  
>  	spin_lock_irq(&hugetlb_lock);
> -	ret = __get_huge_page_for_hwpoison(pfn, flags);
> +	ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
>  	spin_unlock_irq(&hugetlb_lock);
>  	return ret;
>  }
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 145bb561ddb3..d4fef56c0438 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1244,7 +1244,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
>  	int ret = 0;
>  	bool hugetlb = false;
>  
> -	ret = get_hwpoison_huge_page(head, &hugetlb);
> +	ret = get_hwpoison_huge_page(head, &hugetlb, false);
>  	if (hugetlb)
>  		return ret;
>  
> @@ -1334,7 +1334,7 @@ static int __get_unpoison_page(struct page *page)
>  	int ret = 0;
>  	bool hugetlb = false;
>  
> -	ret = get_hwpoison_huge_page(head, &hugetlb);
> +	ret = get_hwpoison_huge_page(head, &hugetlb, true);
>  	if (hugetlb)
>  		return ret;
>  
> @@ -1785,7 +1785,8 @@ void hugetlb_clear_page_hwpoison(struct page *hpage)
>   *   -EBUSY        - the hugepage is busy (try to retry)
>   *   -EHWPOISON    - the hugepage is already hwpoisoned
>   */
> -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> +int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> +				 bool *migratable_cleared)
>  {
>  	struct page *page = pfn_to_page(pfn);
>  	struct page *head = compound_head(page);
> @@ -1815,6 +1816,15 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
>  		goto out;
>  	}
>  
> +	/*
> +	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
> +	 * from being migrated by memory hotremove.
> +	 */
> +	if (count_increased) {
> +		*migratable_cleared = true;
> +		ClearHPageMigratable(head);

I think I might be nitpicking... But it seems ClearHPageMigratable is not enough here.
  1. In MF_COUNT_INCREASED case, we don't know whether HPageMigratable is set.
  2. Even if HPageMigratable is set, there might be a race window before we clear HPageMigratable?
So "*migratable_cleared = TestClearHPageMigratable" might be better? But I might be wrong.

With above fixed (if it's really a problem), this patch looks good to me.

Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>

Thanks,
Miaohe Lin
HORIGUCHI NAOYA(堀口 直也) Oct. 17, 2022, 7:24 a.m. UTC | #3
On Sat, Oct 15, 2022 at 09:58:09AM +0800, Miaohe Lin wrote:
> On 2022/10/7 9:07, Naoya Horiguchi wrote:
...
> > @@ -1785,7 +1785,8 @@ void hugetlb_clear_page_hwpoison(struct page *hpage)
> >   *   -EBUSY        - the hugepage is busy (try to retry)
> >   *   -EHWPOISON    - the hugepage is already hwpoisoned
> >   */
> > -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> > +int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> > +				 bool *migratable_cleared)
> >  {
> >  	struct page *page = pfn_to_page(pfn);
> >  	struct page *head = compound_head(page);
> > @@ -1815,6 +1816,15 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> >  		goto out;
> >  	}
> >  
> > +	/*
> > +	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
> > +	 * from being migrated by memory hotremove.
> > +	 */
> > +	if (count_increased) {
> > +		*migratable_cleared = true;
> > +		ClearHPageMigratable(head);
> 
> I think I might be nitpicking... But it seems ClearHPageMigratable is not enough here.
>   1. In MF_COUNT_INCREASED case, we don't know whether HPageMigratable is set.
>   2. Even if HPageMigratable is set, there might be a race window before we clear HPageMigratable?

Maybe this race is what I mentioned in
https://lore.kernel.org/linux-mm/20220928012647.GA597297@u2004.lan/
(the second scenario).  My stance to this case is that the hugepage is not
hwpoisoned even if some hardware (not visible to kernel) issue is in it,
so hwpoison handler can/may not cope with the race.
I guess that this could be handled by applying memcpy_mcsafe() mechanism
to page migration, but I'm not sure of the feasibility.

> So "*migratable_cleared = TestClearHPageMigratable" might be better? But I might be wrong.

Thanks, this seems work for 1 (I need testing to check it), so I'll do this
in the next post.

> 
> With above fixed (if it's really a problem), this patch looks good to me.
> 
> Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>

Thank you very much.

- Naoya Horiguchi
Miaohe Lin Oct. 17, 2022, 1:29 p.m. UTC | #4
On 2022/10/17 15:24, HORIGUCHI NAOYA(堀口 直也) wrote:
> On Sat, Oct 15, 2022 at 09:58:09AM +0800, Miaohe Lin wrote:
>> On 2022/10/7 9:07, Naoya Horiguchi wrote:
> ...
>>> @@ -1785,7 +1785,8 @@ void hugetlb_clear_page_hwpoison(struct page *hpage)
>>>   *   -EBUSY        - the hugepage is busy (try to retry)
>>>   *   -EHWPOISON    - the hugepage is already hwpoisoned
>>>   */
>>> -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
>>> +int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>>> +				 bool *migratable_cleared)
>>>  {
>>>  	struct page *page = pfn_to_page(pfn);
>>>  	struct page *head = compound_head(page);
>>> @@ -1815,6 +1816,15 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
>>>  		goto out;
>>>  	}
>>>  
>>> +	/*
>>> +	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
>>> +	 * from being migrated by memory hotremove.
>>> +	 */
>>> +	if (count_increased) {
>>> +		*migratable_cleared = true;
>>> +		ClearHPageMigratable(head);
>>
>> I think I might be nitpicking... But it seems ClearHPageMigratable is not enough here.
>>   1. In MF_COUNT_INCREASED case, we don't know whether HPageMigratable is set.
>>   2. Even if HPageMigratable is set, there might be a race window before we clear HPageMigratable?
> 
> Maybe this race is what I mentioned in
> https://lore.kernel.org/linux-mm/20220928012647.GA597297@u2004.lan/
> (the second scenario).  My stance to this case is that the hugepage is not
> hwpoisoned even if some hardware (not visible to kernel) issue is in it,
> so hwpoison handler can/may not cope with the race.
> I guess that this could be handled by applying memcpy_mcsafe() mechanism
> to page migration, but I'm not sure of the feasibility.

Thanks Naoya. memcpy_mcsafe() might be a good idea to handle hwpoison with the memory copy in
page migration path. And [1] is doing the similar thing. If this mechanism is applicable, then
we could handle more memory failure scene. ;)

[1] https://lore.kernel.org/linux-mm/20221010160142.1087120-1-jiaqiyan@google.com/

> 
>> So "*migratable_cleared = TestClearHPageMigratable" might be better? But I might be wrong.
> 
> Thanks, this seems work for 1 (I need testing to check it), so I'll do this
> in the next post.

Many thanks for your work.

Thanks,
Miaohe Lin

> 
>>
>> With above fixed (if it's really a problem), this patch looks good to me.
>>
>> Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
> 
> Thank you very much.
> 
> - Naoya Horiguchi
>
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 32d45e96a894..19b99ff7fea0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -183,8 +183,9 @@  bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 						long freed);
 int isolate_hugetlb(struct page *page, struct list_head *list);
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+				bool *migratable_cleared);
 void putback_active_hugepage(struct page *page);
 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
 void free_huge_page(struct page *page);
@@ -391,12 +392,13 @@  static inline int isolate_hugetlb(struct page *page, struct list_head *list)
 	return -EBUSY;
 }
 
-static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
 {
 	return 0;
 }
 
-static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+					bool *migratable_cleared)
 {
 	return 0;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bbcccbc5565..3264bf993ad8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3277,9 +3277,11 @@  extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
-extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+					bool *migratable_cleared);
 #else
-static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+					bool *migratable_cleared)
 {
 	return 0;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 63fe47a0240a..0e482dfaf92e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7253,7 +7253,7 @@  int isolate_hugetlb(struct page *page, struct list_head *list)
 	return ret;
 }
 
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
 {
 	int ret = 0;
 
@@ -7263,7 +7263,7 @@  int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
 		*hugetlb = true;
 		if (HPageFreed(page))
 			ret = 0;
-		else if (HPageMigratable(page))
+		else if (HPageMigratable(page) || unpoison)
 			ret = get_page_unless_zero(page);
 		else
 			ret = -EBUSY;
@@ -7272,12 +7272,13 @@  int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
 	return ret;
 }
 
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+				bool *migratable_cleared)
 {
 	int ret;
 
 	spin_lock_irq(&hugetlb_lock);
-	ret = __get_huge_page_for_hwpoison(pfn, flags);
+	ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
 	spin_unlock_irq(&hugetlb_lock);
 	return ret;
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 145bb561ddb3..d4fef56c0438 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1244,7 +1244,7 @@  static int __get_hwpoison_page(struct page *page, unsigned long flags)
 	int ret = 0;
 	bool hugetlb = false;
 
-	ret = get_hwpoison_huge_page(head, &hugetlb);
+	ret = get_hwpoison_huge_page(head, &hugetlb, false);
 	if (hugetlb)
 		return ret;
 
@@ -1334,7 +1334,7 @@  static int __get_unpoison_page(struct page *page)
 	int ret = 0;
 	bool hugetlb = false;
 
-	ret = get_hwpoison_huge_page(head, &hugetlb);
+	ret = get_hwpoison_huge_page(head, &hugetlb, true);
 	if (hugetlb)
 		return ret;
 
@@ -1785,7 +1785,8 @@  void hugetlb_clear_page_hwpoison(struct page *hpage)
  *   -EBUSY        - the hugepage is busy (try to retry)
  *   -EHWPOISON    - the hugepage is already hwpoisoned
  */
-int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+				 bool *migratable_cleared)
 {
 	struct page *page = pfn_to_page(pfn);
 	struct page *head = compound_head(page);
@@ -1815,6 +1816,15 @@  int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 		goto out;
 	}
 
+	/*
+	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
+	 * from being migrated by memory hotremove.
+	 */
+	if (count_increased) {
+		*migratable_cleared = true;
+		ClearHPageMigratable(head);
+	}
+
 	return ret;
 out:
 	if (count_increased)
@@ -1834,10 +1844,11 @@  static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 	struct page *p = pfn_to_page(pfn);
 	struct page *head;
 	unsigned long page_flags;
+	bool migratable_cleared = false;
 
 	*hugetlb = 1;
 retry:
-	res = get_huge_page_for_hwpoison(pfn, flags);
+	res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
 	if (res == 2) { /* fallback to normal page handling */
 		*hugetlb = 0;
 		return 0;
@@ -1862,6 +1873,8 @@  static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 
 	if (hwpoison_filter(p)) {
 		hugetlb_clear_page_hwpoison(head);
+		if (migratable_cleared)
+			SetHPageMigratable(head);
 		unlock_page(head);
 		if (res == 1)
 			put_page(head);