[RFC,v2,13/16] mm,hwpoison: Take pages off the buddy when hard-offlining
diff mbox series

Message ID 20191017142123.24245-14-osalvador@suse.de
State New
Headers show
Series
  • Hwpoison rework {hard,soft}-offline
Related show

Commit Message

Oscar Salvador Oct. 17, 2019, 2:21 p.m. UTC
We need to do as we do now for soft-offline, and take poisoned pages
off the buddy allocator.
Otherwise we could face [1] as well.

[1] https://lore.kernel.org/linux-mm/20190826104144.GA7849@linux/T/#u

Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
 mm/memory-failure.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

Patch
diff mbox series

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 48eb314598e0..3d491c0d3f91 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -791,6 +791,14 @@  static int me_swapcache_clean(struct page *p, unsigned long pfn)
 		return MF_FAILED;
 }
 
+static int me_huge_free_page(struct page *p)
+{
+	if (page_handle_poison(p, true, false))
+		return MF_RECOVERED;
+	else
+		return MF_FAILED;
+}
+
 /*
  * Huge pages. Needs work.
  * Issues:
@@ -818,8 +826,7 @@  static int me_huge_page(struct page *p, unsigned long pfn)
 		 */
 		if (PageAnon(hpage))
 			put_page(hpage);
-		dissolve_free_huge_page(p);
-		res = MF_RECOVERED;
+		res = me_huge_free_page(p);
 		lock_page(hpage);
 	}
 
@@ -1145,8 +1152,10 @@  static int memory_failure_hugetlb(unsigned long pfn, int flags)
 			}
 		}
 		unlock_page(head);
-		dissolve_free_huge_page(p);
-		action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED);
+		res = me_huge_free_page(p);
+		if (res == MF_FAILED)
+			num_poisoned_pages_dec();
+		action_result(pfn, MF_MSG_FREE_HUGE, res);
 		return 0;
 	}
 
@@ -1307,6 +1316,12 @@  int memory_failure(unsigned long pfn, int flags)
 
 	if (PageHuge(p))
 		return memory_failure_hugetlb(pfn, flags);
+
+	if (is_free_buddy_page(p) && page_handle_poison(p, true, false)) {
+		action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED);
+		return 0;
+	}
+
 	if (TestSetPageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
@@ -1328,10 +1343,10 @@  int memory_failure(unsigned long pfn, int flags)
 	 * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
 	 */
 	if (!get_hwpoison_page(p)) {
-		if (is_free_buddy_page(p)) {
-			action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
+		if (is_free_buddy_page(p) && page_handle_poison(p, true, false)) {
+			action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED);
 			return 0;
-		} else {
+		} else if(!is_free_buddy_page(p)) {
 			action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
 			return -EBUSY;
 		}
@@ -1354,8 +1369,8 @@  int memory_failure(unsigned long pfn, int flags)
 	 */
 	shake_page(p, 0);
 	/* shake_page could have turned it free. */
-	if (!PageLRU(p) && is_free_buddy_page(p)) {
-		action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED);
+	if (!PageLRU(p) && is_free_buddy_page(p) && page_handle_poison(p, true, false)) {
+		action_result(pfn, MF_MSG_BUDDY_2ND, MF_RECOVERED);
 		return 0;
 	}