diff mbox series

[v1,2/5] mm,hwpoison: set PG_hwpoison for busy hugetlb pages

Message ID 20220602050631.771414-3-naoya.horiguchi@linux.dev (mailing list archive)
State New
Headers show
Series mm, hwpoison: enable 1GB hugepage support | expand

Commit Message

Naoya Horiguchi June 2, 2022, 5:06 a.m. UTC
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

If memory_failure() fails to grab page refcount on a hugetlb page
because it's busy, it returns without setting PG_hwpoison on it.
This not only loses a chance of error containment, but breaks the rule
that action_result() should be called only when memory_failure() do
any of handling work (even if that's just setting PG_hwpoison).
This inconsistency could harm code maintainability.

So set PG_hwpoison and call hugetlb_set_page_hwpoison() for such a case.

Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
---
 include/linux/mm.h  | 1 +
 mm/memory-failure.c | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

Comments

Miaohe Lin June 7, 2022, 12:50 p.m. UTC | #1
On 2022/6/2 13:06, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> If memory_failure() fails to grab page refcount on a hugetlb page
> because it's busy, it returns without setting PG_hwpoison on it.
> This not only loses a chance of error containment, but breaks the rule
> that action_result() should be called only when memory_failure() do
> any of handling work (even if that's just setting PG_hwpoison).
> This inconsistency could harm code maintainability.

Yes, this patch will make the code more maintainable. But as discussed previously,
this page might be under the migration, this patch can't save more.

Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>

Thanks!

> 
> So set PG_hwpoison and call hugetlb_set_page_hwpoison() for such a case.
> 
> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
> ---
>  include/linux/mm.h  | 1 +
>  mm/memory-failure.c | 8 ++++----
>  2 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index d446e834a3e5..04de0c3e4f9f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3187,6 +3187,7 @@ enum mf_flags {
>  	MF_MUST_KILL = 1 << 2,
>  	MF_SOFT_OFFLINE = 1 << 3,
>  	MF_UNPOISON = 1 << 4,
> +	MF_NO_RETRY = 1 << 5,
>  };
>  extern int memory_failure(unsigned long pfn, int flags);
>  extern void memory_failure_queue(unsigned long pfn, int flags);
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 056dbb2050f8..fe6a7961dc66 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1526,7 +1526,8 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
>  			count_increased = true;
>  	} else {
>  		ret = -EBUSY;
> -		goto out;
> +		if (!(flags & MF_NO_RETRY))
> +			goto out;
>  	}
>  
>  	if (TestSetPageHWPoison(head)) {
> @@ -1556,7 +1557,6 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>  	struct page *p = pfn_to_page(pfn);
>  	struct page *head;
>  	unsigned long page_flags;
> -	bool retry = true;
>  
>  	*hugetlb = 1;
>  retry:
> @@ -1572,8 +1572,8 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>  		}
>  		return res;
>  	} else if (res == -EBUSY) {
> -		if (retry) {
> -			retry = false;
> +		if (!(flags & MF_NO_RETRY)) {
> +			flags |= MF_NO_RETRY;
>  			goto retry;
>  		}
>  		action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
>
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d446e834a3e5..04de0c3e4f9f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3187,6 +3187,7 @@  enum mf_flags {
 	MF_MUST_KILL = 1 << 2,
 	MF_SOFT_OFFLINE = 1 << 3,
 	MF_UNPOISON = 1 << 4,
+	MF_NO_RETRY = 1 << 5,
 };
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 056dbb2050f8..fe6a7961dc66 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1526,7 +1526,8 @@  int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 			count_increased = true;
 	} else {
 		ret = -EBUSY;
-		goto out;
+		if (!(flags & MF_NO_RETRY))
+			goto out;
 	}
 
 	if (TestSetPageHWPoison(head)) {
@@ -1556,7 +1557,6 @@  static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 	struct page *p = pfn_to_page(pfn);
 	struct page *head;
 	unsigned long page_flags;
-	bool retry = true;
 
 	*hugetlb = 1;
 retry:
@@ -1572,8 +1572,8 @@  static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 		}
 		return res;
 	} else if (res == -EBUSY) {
-		if (retry) {
-			retry = false;
+		if (!(flags & MF_NO_RETRY)) {
+			flags |= MF_NO_RETRY;
 			goto retry;
 		}
 		action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);