diff mbox series

[v2,3/3] mm/memory-failure.c: make non-LRU movable pages unhandlable

Message ID 20220312074613.4798-4-linmiaohe@huawei.com (mailing list archive)
State New, archived
Headers show
Series A few fixup patches for memory failure | expand

Commit Message

Miaohe Lin March 12, 2022, 7:46 a.m. UTC
We can not really handle non-LRU movable pages in memory failure. Typically
they are balloon, zsmalloc, etc. Assuming we run into a base (4K) non-LRU
movable page, we could reach as far as identify_page_state(), it should not
fall into any category except me_unknown. For the non-LRU compound movable
pages, they could be taken for transhuge pages but it's unexpected to split
non-LRU  movable pages using split_huge_page_to_list in memory_failure. So
we could just simply make non-LRU  movable pages unhandlable to avoid these
possible nasty cases.

Suggested-by: Yang Shi <shy828301@gmail.com>
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
---
 mm/memory-failure.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

Comments

HORIGUCHI NAOYA(堀口 直也) March 13, 2022, 11:43 p.m. UTC | #1
On Sat, Mar 12, 2022 at 03:46:13PM +0800, Miaohe Lin wrote:
> We can not really handle non-LRU movable pages in memory failure. Typically
> they are balloon, zsmalloc, etc. Assuming we run into a base (4K) non-LRU
> movable page, we could reach as far as identify_page_state(), it should not
> fall into any category except me_unknown. For the non-LRU compound movable
> pages, they could be taken for transhuge pages but it's unexpected to split
> non-LRU  movable pages using split_huge_page_to_list in memory_failure. So
> we could just simply make non-LRU  movable pages unhandlable to avoid these
> possible nasty cases.
> 
> Suggested-by: Yang Shi <shy828301@gmail.com>
> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>

Looks good to me.

Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Yang Shi March 14, 2022, 5:34 p.m. UTC | #2
On Fri, Mar 11, 2022 at 11:47 PM Miaohe Lin <linmiaohe@huawei.com> wrote:
>
> We can not really handle non-LRU movable pages in memory failure. Typically
> they are balloon, zsmalloc, etc. Assuming we run into a base (4K) non-LRU
> movable page, we could reach as far as identify_page_state(), it should not
> fall into any category except me_unknown. For the non-LRU compound movable
> pages, they could be taken for transhuge pages but it's unexpected to split
> non-LRU  movable pages using split_huge_page_to_list in memory_failure. So
> we could just simply make non-LRU  movable pages unhandlable to avoid these
> possible nasty cases.
>
> Suggested-by: Yang Shi <shy828301@gmail.com>
> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>

Reviewed-by: Yang Shi <shy828301@gmail.com>

> ---
>  mm/memory-failure.c | 20 +++++++++++++-------
>  1 file changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 2ff7dd2078c4..ba621c6823ed 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1177,12 +1177,18 @@ void ClearPageHWPoisonTakenOff(struct page *page)
>   * does not return true for hugetlb or device memory pages, so it's assumed
>   * to be called only in the context where we never have such pages.
>   */
> -static inline bool HWPoisonHandlable(struct page *page)
> +static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
>  {
> -       return PageLRU(page) || __PageMovable(page) || is_free_buddy_page(page);
> +       bool movable = false;
> +
> +       /* Soft offline could mirgate non-LRU movable pages */
> +       if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
> +               movable = true;
> +
> +       return movable || PageLRU(page) || is_free_buddy_page(page);
>  }
>
> -static int __get_hwpoison_page(struct page *page)
> +static int __get_hwpoison_page(struct page *page, unsigned long flags)
>  {
>         struct page *head = compound_head(page);
>         int ret = 0;
> @@ -1197,7 +1203,7 @@ static int __get_hwpoison_page(struct page *page)
>          * for any unsupported type of page in order to reduce the risk of
>          * unexpected races caused by taking a page refcount.
>          */
> -       if (!HWPoisonHandlable(head))
> +       if (!HWPoisonHandlable(head, flags))
>                 return -EBUSY;
>
>         if (get_page_unless_zero(head)) {
> @@ -1222,7 +1228,7 @@ static int get_any_page(struct page *p, unsigned long flags)
>
>  try_again:
>         if (!count_increased) {
> -               ret = __get_hwpoison_page(p);
> +               ret = __get_hwpoison_page(p, flags);
>                 if (!ret) {
>                         if (page_count(p)) {
>                                 /* We raced with an allocation, retry. */
> @@ -1250,7 +1256,7 @@ static int get_any_page(struct page *p, unsigned long flags)
>                 }
>         }
>
> -       if (PageHuge(p) || HWPoisonHandlable(p)) {
> +       if (PageHuge(p) || HWPoisonHandlable(p, flags)) {
>                 ret = 1;
>         } else {
>                 /*
> @@ -2308,7 +2314,7 @@ int soft_offline_page(unsigned long pfn, int flags)
>
>  retry:
>         get_online_mems();
> -       ret = get_hwpoison_page(page, flags);
> +       ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE);
>         put_online_mems();
>
>         if (ret > 0) {
> --
> 2.23.0
>
diff mbox series

Patch

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2ff7dd2078c4..ba621c6823ed 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1177,12 +1177,18 @@  void ClearPageHWPoisonTakenOff(struct page *page)
  * does not return true for hugetlb or device memory pages, so it's assumed
  * to be called only in the context where we never have such pages.
  */
-static inline bool HWPoisonHandlable(struct page *page)
+static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
 {
-	return PageLRU(page) || __PageMovable(page) || is_free_buddy_page(page);
+	bool movable = false;
+
+	/* Soft offline could mirgate non-LRU movable pages */
+	if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
+		movable = true;
+
+	return movable || PageLRU(page) || is_free_buddy_page(page);
 }
 
-static int __get_hwpoison_page(struct page *page)
+static int __get_hwpoison_page(struct page *page, unsigned long flags)
 {
 	struct page *head = compound_head(page);
 	int ret = 0;
@@ -1197,7 +1203,7 @@  static int __get_hwpoison_page(struct page *page)
 	 * for any unsupported type of page in order to reduce the risk of
 	 * unexpected races caused by taking a page refcount.
 	 */
-	if (!HWPoisonHandlable(head))
+	if (!HWPoisonHandlable(head, flags))
 		return -EBUSY;
 
 	if (get_page_unless_zero(head)) {
@@ -1222,7 +1228,7 @@  static int get_any_page(struct page *p, unsigned long flags)
 
 try_again:
 	if (!count_increased) {
-		ret = __get_hwpoison_page(p);
+		ret = __get_hwpoison_page(p, flags);
 		if (!ret) {
 			if (page_count(p)) {
 				/* We raced with an allocation, retry. */
@@ -1250,7 +1256,7 @@  static int get_any_page(struct page *p, unsigned long flags)
 		}
 	}
 
-	if (PageHuge(p) || HWPoisonHandlable(p)) {
+	if (PageHuge(p) || HWPoisonHandlable(p, flags)) {
 		ret = 1;
 	} else {
 		/*
@@ -2308,7 +2314,7 @@  int soft_offline_page(unsigned long pfn, int flags)
 
 retry:
 	get_online_mems();
-	ret = get_hwpoison_page(page, flags);
+	ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE);
 	put_online_mems();
 
 	if (ret > 0) {