diff mbox series

[v6,1/2] mm,hwpoison: fix race with hugetlb page allocation

Message ID 20210603233632.2964832-2-nao.horiguchi@gmail.com (mailing list archive)
State New
Headers show
Series hwpoison: fix race with hugetlb page allocation | expand

Commit Message

Naoya Horiguchi June 3, 2021, 11:36 p.m. UTC
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

When hugetlb page fault (under overcommitting situation) and
memory_failure() race, VM_BUG_ON_PAGE() is triggered by the following race:

    CPU0:                           CPU1:

                                    gather_surplus_pages()
                                      page = alloc_surplus_huge_page()
    memory_failure_hugetlb()
      get_hwpoison_page(page)
        __get_hwpoison_page(page)
          get_page_unless_zero(page)
                                      zero = put_page_testzero(page)
                                      VM_BUG_ON_PAGE(!zero, page)
                                      enqueue_huge_page(h, page)
      put_page(page)

__get_hwpoison_page() only checks the page refcount before taking an
additional one for memory error handling, which is not enough because
there's a time window where compound pages have non-zero refcount during
hugetlb page initialization.

So make __get_hwpoison_page() check page status a bit more for hugetlb
pages with get_hwpoison_huge_page(). Checking hugetlb-specific flags
under hugetlb_lock makes sure that the hugetlb page is not transitive.
It's notable that another new function, HWPoisonHandlable(), is helpful
to prevent a race against other transitive page states (like a generic
compound page just before PageHuge becomes true).

Fixes: ead07f6a867b ("mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reported-by: Muchun Song <songmuchun@bytedance.com>
Cc: stable@vger.kernel.org # 5.12+
---
ChangeLog v6:
- defined HWPoisonHandlable(),
- updated comment and patch description.
---
 include/linux/hugetlb.h |  6 ++++++
 mm/hugetlb.c            | 15 +++++++++++++++
 mm/memory-failure.c     | 29 +++++++++++++++++++++++++++--
 3 files changed, 48 insertions(+), 2 deletions(-)

Comments

Mike Kravetz June 4, 2021, 11:55 p.m. UTC | #1
On 6/3/21 4:36 PM, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> When hugetlb page fault (under overcommitting situation) and
> memory_failure() race, VM_BUG_ON_PAGE() is triggered by the following race:
> 
>     CPU0:                           CPU1:
> 
>                                     gather_surplus_pages()
>                                       page = alloc_surplus_huge_page()
>     memory_failure_hugetlb()
>       get_hwpoison_page(page)
>         __get_hwpoison_page(page)
>           get_page_unless_zero(page)
>                                       zero = put_page_testzero(page)
>                                       VM_BUG_ON_PAGE(!zero, page)
>                                       enqueue_huge_page(h, page)
>       put_page(page)
> 
> __get_hwpoison_page() only checks the page refcount before taking an
> additional one for memory error handling, which is not enough because
> there's a time window where compound pages have non-zero refcount during
> hugetlb page initialization.
> 
> So make __get_hwpoison_page() check page status a bit more for hugetlb
> pages with get_hwpoison_huge_page(). Checking hugetlb-specific flags
> under hugetlb_lock makes sure that the hugetlb page is not transitive.
> It's notable that another new function, HWPoisonHandlable(), is helpful
> to prevent a race against other transitive page states (like a generic
> compound page just before PageHuge becomes true).

Thanks!

I believe this is good enough to prevent the race.  Since access to the
page is not synchronized in any way, it would still be "theoretically
possible" to race.  For example,

    CPU0:                           CPU1:

    HWPoisonHandlable true
    				    page freed to buddy
				    page allocated from buddy
				    page added to hugetlb pool
				    alloc_surplus_huge_page

But, this is very Very VERY unlikely to ever happen.

> 
> Fixes: ead07f6a867b ("mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling")
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
> Reported-by: Muchun Song <songmuchun@bytedance.com>
> Cc: stable@vger.kernel.org # 5.12+
> ---
> ChangeLog v6:
> - defined HWPoisonHandlable(),
> - updated comment and patch description.
> ---
>  include/linux/hugetlb.h |  6 ++++++
>  mm/hugetlb.c            | 15 +++++++++++++++
>  mm/memory-failure.c     | 29 +++++++++++++++++++++++++++--
>  3 files changed, 48 insertions(+), 2 deletions(-)

Acked-by: Mike Kravetz <mike.kravetz@oracle.com>
diff mbox series

Patch

diff --git v5.13-rc2/include/linux/hugetlb.h v5.13-rc2_patched/include/linux/hugetlb.h
index b92f25ccef58..790ae618548d 100644
--- v5.13-rc2/include/linux/hugetlb.h
+++ v5.13-rc2_patched/include/linux/hugetlb.h
@@ -149,6 +149,7 @@  bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 						long freed);
 bool isolate_huge_page(struct page *page, struct list_head *list);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
 void putback_active_hugepage(struct page *page);
 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
 void free_huge_page(struct page *page);
@@ -339,6 +340,11 @@  static inline bool isolate_huge_page(struct page *page, struct list_head *list)
 	return false;
 }
 
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+	return 0;
+}
+
 static inline void putback_active_hugepage(struct page *page)
 {
 }
diff --git v5.13-rc2/mm/hugetlb.c v5.13-rc2_patched/mm/hugetlb.c
index 470f7b5b437e..0c1d87b1e303 100644
--- v5.13-rc2/mm/hugetlb.c
+++ v5.13-rc2_patched/mm/hugetlb.c
@@ -5847,6 +5847,21 @@  bool isolate_huge_page(struct page *page, struct list_head *list)
 	return ret;
 }
 
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+	int ret = 0;
+
+	*hugetlb = false;
+	spin_lock_irq(&hugetlb_lock);
+	if (PageHeadHuge(page)) {
+		*hugetlb = true;
+		if (HPageFreed(page) || HPageMigratable(page))
+			ret = get_page_unless_zero(page);
+	}
+	spin_unlock_irq(&hugetlb_lock);
+	return ret;
+}
+
 void putback_active_hugepage(struct page *page)
 {
 	spin_lock_irq(&hugetlb_lock);
diff --git v5.13-rc2/mm/memory-failure.c v5.13-rc2_patched/mm/memory-failure.c
index 86e9a2217716..0caedbeeed77 100644
--- v5.13-rc2/mm/memory-failure.c
+++ v5.13-rc2_patched/mm/memory-failure.c
@@ -1092,6 +1092,17 @@  static int page_action(struct page_state *ps, struct page *p,
 	return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
 }
 
+/*
+ * Return true if a page type of a given page is supported by hwpoison
+ * mechanism (while handling could fail), otherwise false.  This function
+ * does not return true for hugetlb or device memory pages, so it's assumed
+ * to be called only in the context where we never have such pages.
+ */
+static inline bool HWPoisonHandlable(struct page *page)
+{
+	return PageLRU(page) || __PageMovable(page);
+}
+
 /**
  * __get_hwpoison_page() - Get refcount for memory error handling:
  * @page:	raw error page (hit by memory error)
@@ -1102,8 +1113,22 @@  static int page_action(struct page_state *ps, struct page *p,
 static int __get_hwpoison_page(struct page *page)
 {
 	struct page *head = compound_head(page);
+	int ret = 0;
+	bool hugetlb = false;
+
+	ret = get_hwpoison_huge_page(head, &hugetlb);
+	if (hugetlb)
+		return ret;
+
+	/*
+	 * This check prevents from calling get_hwpoison_unless_zero()
+	 * for any unsupported type of page in order to reduce the risk of
+	 * unexpected races caused by taking a page refcount.
+	 */
+	if (!HWPoisonHandlable(head))
+		return 0;
 
-	if (!PageHuge(head) && PageTransHuge(head)) {
+	if (PageTransHuge(head)) {
 		/*
 		 * Non anonymous thp exists only in allocation/free time. We
 		 * can't handle such a case correctly, so let's give it up.
@@ -1160,7 +1185,7 @@  static int get_any_page(struct page *p, unsigned long flags)
 			ret = -EIO;
 		}
 	} else {
-		if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+		if (PageHuge(p) || HWPoisonHandlable(p)) {
 			ret = 1;
 		} else {
 			/*