diff mbox series

[v3,1/4] mm,hwpoison,hugetlb,memory_hotplug: hotremove memory section with hwpoisoned hugepage

Message ID 20220921091359.25889-2-naoya.horiguchi@linux.dev (mailing list archive)
State New
Headers show
Series mm, hwpoison: improve handling workload related to hugetlb and memory_hotplug | expand

Commit Message

Naoya Horiguchi Sept. 21, 2022, 9:13 a.m. UTC
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

HWPoisoned page is not supposed to be accessed once marked, but currently
such accesses can happen during memory hotremove because do_migrate_range()
can be called before dissolve_free_huge_pages() is called.

Clear HPageMigratable for hwpoisoned hugepages to prevent them from being
migrated.  This should be done in hugetlb_lock to avoid race against
isolate_hugetlb().

get_hwpoison_huge_page() needs to have a flag to show it's called from
unpoison to take refcount of hwpoisoned hugepages, so add it.

Reported-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
---
ChangeLog v2 -> v3
- move to the approach of clearing HPageMigratable instead of shifting
  dissolve_free_huge_pages.
---
 include/linux/hugetlb.h |  4 ++--
 mm/hugetlb.c            |  4 ++--
 mm/memory-failure.c     | 12 ++++++++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

Comments

Miaohe Lin Sept. 24, 2022, 11:43 a.m. UTC | #1
On 2022/9/21 17:13, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> HWPoisoned page is not supposed to be accessed once marked, but currently
> such accesses can happen during memory hotremove because do_migrate_range()
> can be called before dissolve_free_huge_pages() is called.
> 
> Clear HPageMigratable for hwpoisoned hugepages to prevent them from being
> migrated.  This should be done in hugetlb_lock to avoid race against
> isolate_hugetlb().
> 
> get_hwpoison_huge_page() needs to have a flag to show it's called from
> unpoison to take refcount of hwpoisoned hugepages, so add it.
> 
> Reported-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>

Thanks for your work, Naoya. Maybe something to improve below.

> ---
> ChangeLog v2 -> v3
> - move to the approach of clearing HPageMigratable instead of shifting
>   dissolve_free_huge_pages.
> ---
>  include/linux/hugetlb.h |  4 ++--
>  mm/hugetlb.c            |  4 ++--
>  mm/memory-failure.c     | 12 ++++++++++--
>  3 files changed, 14 insertions(+), 6 deletions(-)
> 

<snip>

> @@ -7267,7 +7267,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
>  		*hugetlb = true;
>  		if (HPageFreed(page))
>  			ret = 0;
> -		else if (HPageMigratable(page))
> +		else if (HPageMigratable(page) || unpoison)

Is unpoison_memory() expected to restore the HPageMigratable flag as well ?

>  			ret = get_page_unless_zero(page);
>  		else
>  			ret = -EBUSY;
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 145bb561ddb3..5942e1c0407e 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1244,7 +1244,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
>  	int ret = 0;
>  	bool hugetlb = false;
>  
> -	ret = get_hwpoison_huge_page(head, &hugetlb);
> +	ret = get_hwpoison_huge_page(head, &hugetlb, false);
>  	if (hugetlb)
>  		return ret;
>  
> @@ -1334,7 +1334,7 @@ static int __get_unpoison_page(struct page *page)
>  	int ret = 0;
>  	bool hugetlb = false;
>  
> -	ret = get_hwpoison_huge_page(head, &hugetlb);
> +	ret = get_hwpoison_huge_page(head, &hugetlb, true);
>  	if (hugetlb)
>  		return ret;
>  
> @@ -1815,6 +1815,13 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
>  		goto out;
>  	}
>  
> +	/*
> +	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
> +	 * from being migrated by memory hotremove.
> +	 */
> +	if (count_increased)
> +		ClearHPageMigratable(head);

I believe this can prevent hwpoisoned hugepages from being migrated though there still be some windows.

> +
>  	return ret;
>  out:
>  	if (count_increased)
> @@ -1862,6 +1869,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>  
>  	if (hwpoison_filter(p)) {
>  		hugetlb_clear_page_hwpoison(head);
> +		SetHPageMigratable(head);

Would we set HPageMigratable flag for free hugetlb pages here? IIUC, they're not expected to have this flag set.

Thanks,
Miaohe Lin

>  		unlock_page(head);
>  		if (res == 1)
>  			put_page(head);
>
Naoya Horiguchi Sept. 28, 2022, 1:26 a.m. UTC | #2
On Sat, Sep 24, 2022 at 07:43:16PM +0800, Miaohe Lin wrote:
> On 2022/9/21 17:13, Naoya Horiguchi wrote:
> > From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> > 
> > HWPoisoned page is not supposed to be accessed once marked, but currently
> > such accesses can happen during memory hotremove because do_migrate_range()
> > can be called before dissolve_free_huge_pages() is called.
> > 
> > Clear HPageMigratable for hwpoisoned hugepages to prevent them from being
> > migrated.  This should be done in hugetlb_lock to avoid race against
> > isolate_hugetlb().
> > 
> > get_hwpoison_huge_page() needs to have a flag to show it's called from
> > unpoison to take refcount of hwpoisoned hugepages, so add it.
> > 
> > Reported-by: Miaohe Lin <linmiaohe@huawei.com>
> > Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> Thanks for your work, Naoya. Maybe something to improve below.
> 
> > ---
> > ChangeLog v2 -> v3
> > - move to the approach of clearing HPageMigratable instead of shifting
> >   dissolve_free_huge_pages.
> > ---
> >  include/linux/hugetlb.h |  4 ++--
> >  mm/hugetlb.c            |  4 ++--
> >  mm/memory-failure.c     | 12 ++++++++++--
> >  3 files changed, 14 insertions(+), 6 deletions(-)
> > 
> 
> <snip>
> 
> > @@ -7267,7 +7267,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
> >  		*hugetlb = true;
> >  		if (HPageFreed(page))
> >  			ret = 0;
> > -		else if (HPageMigratable(page))
> > +		else if (HPageMigratable(page) || unpoison)
> 
> Is unpoison_memory() expected to restore the HPageMigratable flag as well ?

No it isn't. When unpoison_memory() unpoisons a hugepage, the hugepage
is sent back to free hugepage pool, so I think that there's no need to
restore HPageMigratable for it.

> 
> >  			ret = get_page_unless_zero(page);
> >  		else
> >  			ret = -EBUSY;
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index 145bb561ddb3..5942e1c0407e 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -1244,7 +1244,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
> >  	int ret = 0;
> >  	bool hugetlb = false;
> >  
> > -	ret = get_hwpoison_huge_page(head, &hugetlb);
> > +	ret = get_hwpoison_huge_page(head, &hugetlb, false);
> >  	if (hugetlb)
> >  		return ret;
> >  
> > @@ -1334,7 +1334,7 @@ static int __get_unpoison_page(struct page *page)
> >  	int ret = 0;
> >  	bool hugetlb = false;
> >  
> > -	ret = get_hwpoison_huge_page(head, &hugetlb);
> > +	ret = get_hwpoison_huge_page(head, &hugetlb, true);
> >  	if (hugetlb)
> >  		return ret;
> >  
> > @@ -1815,6 +1815,13 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> >  		goto out;
> >  	}
> >  
> > +	/*
> > +	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
> > +	 * from being migrated by memory hotremove.
> > +	 */
> > +	if (count_increased)
> > +		ClearHPageMigratable(head);
> 
> I believe this can prevent hwpoisoned hugepages from being migrated though there still be some windows.

I'm not sure of "still racy" part, so could you elaborate it?
Main scenario this patch tries to handle is like below:

  CPU 0                                   CPU 1                                   
  get_huge_page_for_hwpoison                                                      
    // take hugetlb_lock                                                          
    __get_huge_page_for_hwpoison                                                  
                                          scan_movable_pages                      
                                            if HPageMigratable                    
                                              goto found                          
                                          do_migrate_range                        
      if HPageMigratable                                                          
        get_page_unless_zero                                                      
      hugetlb_set_page_hwpoison                                                   
      ClearHPageMigratable                                                        
    // release hugetlb_lock                                                       
                                            isolate_hugetlb                       
                                              // take hugetlb_lock                
                                              if !HPageMigratable                 
                                                // fails to isolate the hwpoisoned hugetlb.

Maybe the following could be possible.

  CPU 0                                   CPU 1
                                          scan_movable_pages
                                            if HPageMigratable
                                              goto found
                                          do_migrate_range
                                            isolate_hugetlb
                                              // the hugetlb is isolated,
                                              // but it's not hwpoisoned yet.
  get_huge_page_for_hwpoison
    // take hugetlb_lock
    __get_huge_page_for_hwpoison
      if HPageMigratable
        get_page_unless_zero
      hugetlb_set_page_hwpoison
      ClearHPageMigratable
    // release hugetlb_lock

In this case, the hugepage is maybe broken but not marked as hwpoison yet,
so it's not detectable.

> 
> > +
> >  	return ret;
> >  out:
> >  	if (count_increased)
> > @@ -1862,6 +1869,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
> >  
> >  	if (hwpoison_filter(p)) {
> >  		hugetlb_clear_page_hwpoison(head);
> > +		SetHPageMigratable(head);
> 
> Would we set HPageMigratable flag for free hugetlb pages here? IIUC, they're not expected to have this flag set.

Thank you, you're right.  This should be done in "if (res == 1)" block.
(hwpoison_filter often bothers me ...)

> 
> Thanks,
> Miaohe Lin

Thank you very much!
- Naoya Horiguchi
Miaohe Lin Sept. 28, 2022, 9:32 a.m. UTC | #3
On 2022/9/28 9:26, Naoya Horiguchi wrote:
> On Sat, Sep 24, 2022 at 07:43:16PM +0800, Miaohe Lin wrote:
>> On 2022/9/21 17:13, Naoya Horiguchi wrote:
>>> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
>>>
>>> HWPoisoned page is not supposed to be accessed once marked, but currently
>>> such accesses can happen during memory hotremove because do_migrate_range()
>>> can be called before dissolve_free_huge_pages() is called.
>>>
>>> Clear HPageMigratable for hwpoisoned hugepages to prevent them from being
>>> migrated.  This should be done in hugetlb_lock to avoid race against
>>> isolate_hugetlb().
>>>
>>> get_hwpoison_huge_page() needs to have a flag to show it's called from
>>> unpoison to take refcount of hwpoisoned hugepages, so add it.
>>>
>>> Reported-by: Miaohe Lin <linmiaohe@huawei.com>
>>> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
>>
>> Thanks for your work, Naoya. Maybe something to improve below.
>>
>>> ---
>>> ChangeLog v2 -> v3
>>> - move to the approach of clearing HPageMigratable instead of shifting
>>>   dissolve_free_huge_pages.
>>> ---
>>>  include/linux/hugetlb.h |  4 ++--
>>>  mm/hugetlb.c            |  4 ++--
>>>  mm/memory-failure.c     | 12 ++++++++++--
>>>  3 files changed, 14 insertions(+), 6 deletions(-)
>>>
>>
>> <snip>
>>
>>> @@ -7267,7 +7267,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
>>>  		*hugetlb = true;
>>>  		if (HPageFreed(page))
>>>  			ret = 0;
>>> -		else if (HPageMigratable(page))
>>> +		else if (HPageMigratable(page) || unpoison)
>>
>> Is unpoison_memory() expected to restore the HPageMigratable flag as well ?
> 
> No it isn't. When unpoison_memory() unpoisons a hugepage, the hugepage
> is sent back to free hugepage pool, so I think that there's no need to
> restore HPageMigratable for it.

I tend to agree with you.

> 
>>
>>>  			ret = get_page_unless_zero(page);
>>>  		else
>>>  			ret = -EBUSY;
>>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>>> index 145bb561ddb3..5942e1c0407e 100644
>>> --- a/mm/memory-failure.c
>>> +++ b/mm/memory-failure.c
>>> @@ -1244,7 +1244,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
>>>  	int ret = 0;
>>>  	bool hugetlb = false;
>>>  
>>> -	ret = get_hwpoison_huge_page(head, &hugetlb);
>>> +	ret = get_hwpoison_huge_page(head, &hugetlb, false);
>>>  	if (hugetlb)
>>>  		return ret;
>>>  
>>> @@ -1334,7 +1334,7 @@ static int __get_unpoison_page(struct page *page)
>>>  	int ret = 0;
>>>  	bool hugetlb = false;
>>>  
>>> -	ret = get_hwpoison_huge_page(head, &hugetlb);
>>> +	ret = get_hwpoison_huge_page(head, &hugetlb, true);
>>>  	if (hugetlb)
>>>  		return ret;
>>>  
>>> @@ -1815,6 +1815,13 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
>>>  		goto out;
>>>  	}
>>>  
>>> +	/*
>>> +	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
>>> +	 * from being migrated by memory hotremove.
>>> +	 */
>>> +	if (count_increased)
>>> +		ClearHPageMigratable(head);
>>
>> I believe this can prevent hwpoisoned hugepages from being migrated though there still be some windows.
> 
> I'm not sure of "still racy" part, so could you elaborate it?
> Main scenario this patch tries to handle is like below:
> 
>   CPU 0                                   CPU 1                                   
>   get_huge_page_for_hwpoison                                                      
>     // take hugetlb_lock                                                          
>     __get_huge_page_for_hwpoison                                                  
>                                           scan_movable_pages                      
>                                             if HPageMigratable                    
>                                               goto found                          
>                                           do_migrate_range                        
>       if HPageMigratable                                                          
>         get_page_unless_zero                                                      
>       hugetlb_set_page_hwpoison                                                   
>       ClearHPageMigratable                                                        
>     // release hugetlb_lock                                                       
>                                             isolate_hugetlb                       
>                                               // take hugetlb_lock                
>                                               if !HPageMigratable                 
>                                                 // fails to isolate the hwpoisoned hugetlb.
> 
> Maybe the following could be possible.
> 
>   CPU 0                                   CPU 1
>                                           scan_movable_pages
>                                             if HPageMigratable
>                                               goto found
>                                           do_migrate_range
>                                             isolate_hugetlb
>                                               // the hugetlb is isolated,
>                                               // but it's not hwpoisoned yet.
>   get_huge_page_for_hwpoison
>     // take hugetlb_lock
>     __get_huge_page_for_hwpoison
>       if HPageMigratable
>         get_page_unless_zero
>       hugetlb_set_page_hwpoison
>       ClearHPageMigratable
>     // release hugetlb_lock

Yes, this is what I mean. For already isolated hugetlb pages, HPageMigratable flags can't help much.

> 
> In this case, the hugepage is maybe broken but not marked as hwpoison yet,
> so it's not detectable.
> 
>>
>>> +
>>>  	return ret;
>>>  out:
>>>  	if (count_increased)
>>> @@ -1862,6 +1869,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>>>  
>>>  	if (hwpoison_filter(p)) {
>>>  		hugetlb_clear_page_hwpoison(head);
>>> +		SetHPageMigratable(head);
>>
>> Would we set HPageMigratable flag for free hugetlb pages here? IIUC, they're not expected to have this flag set.
> 
> Thank you, you're right.  This should be done in "if (res == 1)" block.

If res == 1, it means hugetlb page refcnt is incremented. But it seems this does not necessarily mean
HPageMigratable is cleared by __get_huge_page_for_hwpoison() if the hugetlb page is already isolated.
If so, we might set HPageMigratable flag back for already isolated hugetlb pages?

> (hwpoison_filter often bothers me ...)

Agree, hwpoison_filter() makes things more complicated. ;)

Thanks,
Miaohe Lin

> 
>>
>> Thanks,
>> Miaohe Lin
> 
> Thank you very much!
> - Naoya Horiguchi
> .
>
HORIGUCHI NAOYA(堀口 直也) Oct. 7, 2022, 12:45 a.m. UTC | #4
On Wed, Sep 28, 2022 at 05:32:12PM +0800, Miaohe Lin wrote:
> On 2022/9/28 9:26, Naoya Horiguchi wrote:
> >>> @@ -1815,6 +1815,13 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
...
> >>> @@ -1862,6 +1869,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
> >>>  
> >>>  	if (hwpoison_filter(p)) {
> >>>  		hugetlb_clear_page_hwpoison(head);
> >>> +		SetHPageMigratable(head);
> >>
> >> Would we set HPageMigratable flag for free hugetlb pages here? IIUC, they're not expected to have this flag set.
> > 
> > Thank you, you're right.  This should be done in "if (res == 1)" block.
> 
> If res == 1, it means hugetlb page refcnt is incremented. But it seems this does not necessarily mean
> HPageMigratable is cleared by __get_huge_page_for_hwpoison() if the hugetlb page is already isolated.
> If so, we might set HPageMigratable flag back for already isolated hugetlb pages?

# sorry for my late reply, I was busy with personal matters these days...

Yes, that could happen (and also in the case where MF_COUNT_INCREASED is
set).  We need store whether HPageMigratable flag is cleared or not in
__get_huge_page_for_hwpoison().  I'll add a parameter to
__get_huge_page_for_hwpoison() to return the flag change to the caller.
But I also think that there're a few internal states during error handling,
so it might be good to add some structure like "struct hwpoison_control"
to save such internal states over related functions (not in this series).

Thanks,
Naoya Horiguchi
Miaohe Lin Oct. 8, 2022, 2:33 a.m. UTC | #5
On 2022/10/7 8:45, HORIGUCHI NAOYA(堀口 直也) wrote:
> On Wed, Sep 28, 2022 at 05:32:12PM +0800, Miaohe Lin wrote:
>> On 2022/9/28 9:26, Naoya Horiguchi wrote:
>>>>> @@ -1815,6 +1815,13 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
> ...
>>>>> @@ -1862,6 +1869,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>>>>>  
>>>>>  	if (hwpoison_filter(p)) {
>>>>>  		hugetlb_clear_page_hwpoison(head);
>>>>> +		SetHPageMigratable(head);
>>>>
>>>> Would we set HPageMigratable flag for free hugetlb pages here? IIUC, they're not expected to have this flag set.
>>>
>>> Thank you, you're right.  This should be done in "if (res == 1)" block.
>>
>> If res == 1, it means hugetlb page refcnt is incremented. But it seems this does not necessarily mean
>> HPageMigratable is cleared by __get_huge_page_for_hwpoison() if the hugetlb page is already isolated.
>> If so, we might set HPageMigratable flag back for already isolated hugetlb pages?
> 
> # sorry for my late reply, I was busy with personal matters these days...

That's all right. I was back from my vacation too. Thanks for your work. :)

> 
> Yes, that could happen (and also in the case where MF_COUNT_INCREASED is
> set).  We need store whether HPageMigratable flag is cleared or not in
> __get_huge_page_for_hwpoison().  I'll add a parameter to
> __get_huge_page_for_hwpoison() to return the flag change to the caller.
> But I also think that there're a few internal states during error handling,
> so it might be good to add some structure like "struct hwpoison_control"
> to save such internal states over related functions (not in this series).

That sounds like a good idea.

Thanks,
Miaohe Lin

> 
> Thanks,
> Naoya Horiguchi
>
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index cfe15b32e2d4..18229402c6d7 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -181,7 +181,7 @@  bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 						long freed);
 int isolate_hugetlb(struct page *page, struct list_head *list);
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
 int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
 void putback_active_hugepage(struct page *page);
 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
@@ -425,7 +425,7 @@  static inline int isolate_hugetlb(struct page *page, struct list_head *list)
 	return -EBUSY;
 }
 
-static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
 {
 	return 0;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8bcaf66defc5..d3b83c570b56 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7257,7 +7257,7 @@  int isolate_hugetlb(struct page *page, struct list_head *list)
 	return ret;
 }
 
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
 {
 	int ret = 0;
 
@@ -7267,7 +7267,7 @@  int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
 		*hugetlb = true;
 		if (HPageFreed(page))
 			ret = 0;
-		else if (HPageMigratable(page))
+		else if (HPageMigratable(page) || unpoison)
 			ret = get_page_unless_zero(page);
 		else
 			ret = -EBUSY;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 145bb561ddb3..5942e1c0407e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1244,7 +1244,7 @@  static int __get_hwpoison_page(struct page *page, unsigned long flags)
 	int ret = 0;
 	bool hugetlb = false;
 
-	ret = get_hwpoison_huge_page(head, &hugetlb);
+	ret = get_hwpoison_huge_page(head, &hugetlb, false);
 	if (hugetlb)
 		return ret;
 
@@ -1334,7 +1334,7 @@  static int __get_unpoison_page(struct page *page)
 	int ret = 0;
 	bool hugetlb = false;
 
-	ret = get_hwpoison_huge_page(head, &hugetlb);
+	ret = get_hwpoison_huge_page(head, &hugetlb, true);
 	if (hugetlb)
 		return ret;
 
@@ -1815,6 +1815,13 @@  int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 		goto out;
 	}
 
+	/*
+	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
+	 * from being migrated by memory hotremove.
+	 */
+	if (count_increased)
+		ClearHPageMigratable(head);
+
 	return ret;
 out:
 	if (count_increased)
@@ -1862,6 +1869,7 @@  static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 
 	if (hwpoison_filter(p)) {
 		hugetlb_clear_page_hwpoison(head);
+		SetHPageMigratable(head);
 		unlock_page(head);
 		if (res == 1)
 			put_page(head);