diff mbox series

[v2,6/9] mm, hwpoison: set PG_hwpoison for busy hugetlb pages

Message ID 20220623235153.2623702-7-naoya.horiguchi@linux.dev (mailing list archive)
State New
Headers show
Series mm, hwpoison: enable 1GB hugepage support (v2) | expand

Commit Message

Naoya Horiguchi June 23, 2022, 11:51 p.m. UTC
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

If memory_failure() fails to grab page refcount on a hugetlb page
because it's busy, it returns without setting PG_hwpoison on it.
This not only loses a chance of error containment, but breaks the rule
that action_result() should be called only when memory_failure() do
any of handling work (even if that's just setting PG_hwpoison).
This inconsistency could harm code maintainability.

So set PG_hwpoison and call hugetlb_set_page_hwpoison() for such a case.

Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
---
 include/linux/mm.h  | 1 +
 mm/memory-failure.c | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

Comments

Miaohe Lin June 27, 2022, 8:39 a.m. UTC | #1
On 2022/6/24 7:51, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> If memory_failure() fails to grab page refcount on a hugetlb page
> because it's busy, it returns without setting PG_hwpoison on it.
> This not only loses a chance of error containment, but breaks the rule
> that action_result() should be called only when memory_failure() do
> any of handling work (even if that's just setting PG_hwpoison).
> This inconsistency could harm code maintainability.
> 
> So set PG_hwpoison and call hugetlb_set_page_hwpoison() for such a case.
> 
> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>

Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>

Thanks!

> ---
>  include/linux/mm.h  | 1 +
>  mm/memory-failure.c | 8 ++++----
>  2 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 4346e51484ba..044dc5a2e361 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3233,6 +3233,7 @@ enum mf_flags {
>  	MF_SOFT_OFFLINE = 1 << 3,
>  	MF_UNPOISON = 1 << 4,
>  	MF_SW_SIMULATED = 1 << 5,
> +	MF_NO_RETRY = 1 << 6,
>  };
>  extern int memory_failure(unsigned long pfn, int flags);
>  extern void memory_failure_queue(unsigned long pfn, int flags);
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 6005e953d011..ce045d0d6115 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1632,7 +1632,8 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
>  			count_increased = true;
>  	} else {
>  		ret = -EBUSY;
> -		goto out;
> +		if (!(flags & MF_NO_RETRY))
> +			goto out;
>  	}
>  
>  	if (hugetlb_set_page_hwpoison(head, page)) {
> @@ -1659,7 +1660,6 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>  	struct page *p = pfn_to_page(pfn);
>  	struct page *head;
>  	unsigned long page_flags;
> -	bool retry = true;
>  
>  	*hugetlb = 1;
>  retry:
> @@ -1675,8 +1675,8 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>  		}
>  		return res;
>  	} else if (res == -EBUSY) {
> -		if (retry) {
> -			retry = false;
> +		if (!(flags & MF_NO_RETRY)) {
> +			flags |= MF_NO_RETRY;
>  			goto retry;
>  		}
>  		action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
>
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4346e51484ba..044dc5a2e361 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3233,6 +3233,7 @@  enum mf_flags {
 	MF_SOFT_OFFLINE = 1 << 3,
 	MF_UNPOISON = 1 << 4,
 	MF_SW_SIMULATED = 1 << 5,
+	MF_NO_RETRY = 1 << 6,
 };
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6005e953d011..ce045d0d6115 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1632,7 +1632,8 @@  int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 			count_increased = true;
 	} else {
 		ret = -EBUSY;
-		goto out;
+		if (!(flags & MF_NO_RETRY))
+			goto out;
 	}
 
 	if (hugetlb_set_page_hwpoison(head, page)) {
@@ -1659,7 +1660,6 @@  static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 	struct page *p = pfn_to_page(pfn);
 	struct page *head;
 	unsigned long page_flags;
-	bool retry = true;
 
 	*hugetlb = 1;
 retry:
@@ -1675,8 +1675,8 @@  static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 		}
 		return res;
 	} else if (res == -EBUSY) {
-		if (retry) {
-			retry = false;
+		if (!(flags & MF_NO_RETRY)) {
+			flags |= MF_NO_RETRY;
 			goto retry;
 		}
 		action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);