diff mbox series

[mm-unstable,v5,4/8] mm, hwpoison: make unpoison aware of raw error info in hwpoisoned hugepage

Message ID 20220708053653.964464-5-naoya.horiguchi@linux.dev (mailing list archive)
State New
Headers show
Series mm, hwpoison: enable 1GB hugepage support (v5) | expand

Commit Message

Naoya Horiguchi July 8, 2022, 5:36 a.m. UTC
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

Raw error info list needs to be removed when hwpoisoned hugetlb is
unpoisoned.  And unpoison handler needs to know how many errors there
are in the target hugepage. So add them.

HPageVmemmapOptimized(hpage) and HPageRawHwpUnreliable(hpage)) can't be
unpoisoned, so let's skip them.

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reported-by: kernel test robot <lkp@intel.com>
---
v4 -> v5:
- fix type of return value of free_raw_hwp_pages()
  (found by kernel test robot),
- prevent unpoison for HPageVmemmapOptimized and HPageRawHwpUnreliable.
---
 include/linux/swapops.h |  9 ++++++++
 mm/memory-failure.c     | 50 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 53 insertions(+), 6 deletions(-)

Comments

Miaohe Lin July 11, 2022, 7:09 a.m. UTC | #1
On 2022/7/8 13:36, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> Raw error info list needs to be removed when hwpoisoned hugetlb is
> unpoisoned.  And unpoison handler needs to know how many errors there
> are in the target hugepage. So add them.
> 
> HPageVmemmapOptimized(hpage) and HPageRawHwpUnreliable(hpage)) can't be
> unpoisoned, so let's skip them.
> 
> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
> Reported-by: kernel test robot <lkp@intel.com>
> ---
> v4 -> v5:
> - fix type of return value of free_raw_hwp_pages()
>   (found by kernel test robot),
> - prevent unpoison for HPageVmemmapOptimized and HPageRawHwpUnreliable.
> ---
>  include/linux/swapops.h |  9 ++++++++
>  mm/memory-failure.c     | 50 ++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 53 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index a01aeb3fcc0b..ddc98f96ad2c 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -498,6 +498,11 @@ static inline void num_poisoned_pages_dec(void)
>  	atomic_long_dec(&num_poisoned_pages);
>  }
>  
> +static inline void num_poisoned_pages_sub(long i)
> +{
> +	atomic_long_sub(i, &num_poisoned_pages);
> +}
> +
>  #else
>  
>  static inline swp_entry_t make_hwpoison_entry(struct page *page)
> @@ -518,6 +523,10 @@ static inline struct page *hwpoison_entry_to_page(swp_entry_t entry)
>  static inline void num_poisoned_pages_inc(void)
>  {
>  }
> +
> +static inline void num_poisoned_pages_sub(long i)
> +{
> +}
>  #endif
>  
>  static inline int non_swap_entry(swp_entry_t entry)
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 6833c5e4b410..89e74ec8a95f 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1720,22 +1720,41 @@ static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page)
>  	return ret;
>  }
>  
> -int hugetlb_clear_page_hwpoison(struct page *hpage)
> +static long free_raw_hwp_pages(struct page *hpage, bool move_flag)

NO strong opinion: Maybe the return type should be "unsigned" as it always >= 0 ?

>  {
>  	struct llist_head *head;
>  	struct llist_node *t, *tnode;
> +	long count = 0;
>  
> -	if (!HPageRawHwpUnreliable(hpage))
> -		ClearPageHWPoison(hpage);
> +	/*
> +	 * HPageVmemmapOptimized hugepages can't be unpoisoned because
> +	 * struct pages for tail pages are required to free hwpoisoned
> +	 * hugepages.  HPageRawHwpUnreliable hugepages shouldn't be
> +	 * unpoisoned by definition.
> +	 */
> +	if (HPageVmemmapOptimized(hpage) || HPageRawHwpUnreliable(hpage))
> +		return 0;
>  	head = raw_hwp_list_head(hpage);
>  	llist_for_each_safe(tnode, t, head->first) {
>  		struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
>  
> -		SetPageHWPoison(p->page);
> +		if (move_flag)
> +			SetPageHWPoison(p->page);
>  		kfree(p);
> +		count++;
>  	}
>  	llist_del_all(head);
> -	return 0;
> +	return count;
> +}
> +
> +int hugetlb_clear_page_hwpoison(struct page *hpage)

It seems the return value is unused?

> +{
> +	int ret = -EBUSY;
> +
> +	if (!HPageRawHwpUnreliable(hpage))
> +		ret = !TestClearPageHWPoison(hpage);
> +	free_raw_hwp_pages(hpage, true);
> +	return ret;
>  }
>  
>  /*
> @@ -1879,6 +1898,10 @@ static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *
>  	return 0;
>  }
>  
> +static inline long free_raw_hwp_pages(struct page *hpage, bool move_flag)

If return type is changed, remember to change here too.

> +{
> +	return 0;
> +}
>  #endif	/* CONFIG_HUGETLB_PAGE */
>  
>  static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
> @@ -2284,6 +2307,7 @@ int unpoison_memory(unsigned long pfn)
>  	struct page *p;
>  	int ret = -EBUSY;
>  	int freeit = 0;
> +	long count = 1;
>  	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
>  					DEFAULT_RATELIMIT_BURST);
>  
> @@ -2331,6 +2355,13 @@ int unpoison_memory(unsigned long pfn)
>  
>  	ret = get_hwpoison_page(p, MF_UNPOISON);
>  	if (!ret) {
> +		if (PageHuge(p)) {
> +			count = free_raw_hwp_pages(page, false);

It seems the current behavior is: if any subpage of a hugetlb page is unpoisoned, then all of the
hwpoisoned subpages will be unpoisoned. I'm not sure whether this is what we want.

Thanks.

> +			if (count == 0) {
> +				ret = -EBUSY;
> +				goto unlock_mutex;
> +			}
> +		}
>  		ret = TestClearPageHWPoison(page) ? 0 : -EBUSY;
>  	} else if (ret < 0) {
>  		if (ret == -EHWPOISON) {
> @@ -2339,6 +2370,13 @@ int unpoison_memory(unsigned long pfn)
>  			unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
>  					 pfn, &unpoison_rs);
>  	} else {
> +		if (PageHuge(p)) {
> +			count = free_raw_hwp_pages(page, false);
> +			if (count == 0) {
> +				ret = -EBUSY;
> +				goto unlock_mutex;
> +			}
> +		}
>  		freeit = !!TestClearPageHWPoison(p);
>  
>  		put_page(page);
> @@ -2351,7 +2389,7 @@ int unpoison_memory(unsigned long pfn)
>  unlock_mutex:
>  	mutex_unlock(&mf_mutex);
>  	if (!ret || freeit) {
> -		num_poisoned_pages_dec();
> +		num_poisoned_pages_sub(count);
>  		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
>  				 page_to_pfn(p), &unpoison_rs);
>  	}
>
HORIGUCHI NAOYA(堀口 直也) July 11, 2022, 9:24 a.m. UTC | #2
On Mon, Jul 11, 2022 at 03:09:01PM +0800, Miaohe Lin wrote:
> On 2022/7/8 13:36, Naoya Horiguchi wrote:
> > From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> > 
> > Raw error info list needs to be removed when hwpoisoned hugetlb is
> > unpoisoned.  And unpoison handler needs to know how many errors there
> > are in the target hugepage. So add them.
> > 
> > HPageVmemmapOptimized(hpage) and HPageRawHwpUnreliable(hpage)) can't be
> > unpoisoned, so let's skip them.
> > 
> > Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
> > Reported-by: kernel test robot <lkp@intel.com>
> > ---
> > v4 -> v5:
> > - fix type of return value of free_raw_hwp_pages()
> >   (found by kernel test robot),
> > - prevent unpoison for HPageVmemmapOptimized and HPageRawHwpUnreliable.
> > ---
...
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index 6833c5e4b410..89e74ec8a95f 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -1720,22 +1720,41 @@ static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page)
> >  	return ret;
> >  }
> >  
> > -int hugetlb_clear_page_hwpoison(struct page *hpage)
> > +static long free_raw_hwp_pages(struct page *hpage, bool move_flag)
> 
> NO strong opinion: Maybe the return type should be "unsigned" as it always >= 0 ?

Yes, will update.

> 
> >  {
> >  	struct llist_head *head;
> >  	struct llist_node *t, *tnode;
> > +	long count = 0;
> >  
> > -	if (!HPageRawHwpUnreliable(hpage))
> > -		ClearPageHWPoison(hpage);
> > +	/*
> > +	 * HPageVmemmapOptimized hugepages can't be unpoisoned because
> > +	 * struct pages for tail pages are required to free hwpoisoned
> > +	 * hugepages.  HPageRawHwpUnreliable hugepages shouldn't be
> > +	 * unpoisoned by definition.
> > +	 */
> > +	if (HPageVmemmapOptimized(hpage) || HPageRawHwpUnreliable(hpage))
> > +		return 0;
> >  	head = raw_hwp_list_head(hpage);
> >  	llist_for_each_safe(tnode, t, head->first) {
> >  		struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
> >  
> > -		SetPageHWPoison(p->page);
> > +		if (move_flag)
> > +			SetPageHWPoison(p->page);
> >  		kfree(p);
> > +		count++;
> >  	}
> >  	llist_del_all(head);
> > -	return 0;
> > +	return count;
> > +}
> > +
> > +int hugetlb_clear_page_hwpoison(struct page *hpage)
> 
> It seems the return value is unused?

Yes, the return value is not needed now.

> 
> > +{
> > +	int ret = -EBUSY;
> > +
> > +	if (!HPageRawHwpUnreliable(hpage))
> > +		ret = !TestClearPageHWPoison(hpage);
> > +	free_raw_hwp_pages(hpage, true);
> > +	return ret;
> >  }
> >  
> >  /*
> > @@ -1879,6 +1898,10 @@ static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *
> >  	return 0;
> >  }
> >  
> > +static inline long free_raw_hwp_pages(struct page *hpage, bool move_flag)
> 
> If return type is changed, remember to change here too.

OK.

> > +{
> > +	return 0;
> > +}
> >  #endif	/* CONFIG_HUGETLB_PAGE */
> >  
> >  static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
> > @@ -2284,6 +2307,7 @@ int unpoison_memory(unsigned long pfn)
> >  	struct page *p;
> >  	int ret = -EBUSY;
> >  	int freeit = 0;
> > +	long count = 1;
> >  	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
> >  					DEFAULT_RATELIMIT_BURST);
> >  
> > @@ -2331,6 +2355,13 @@ int unpoison_memory(unsigned long pfn)
> >  
> >  	ret = get_hwpoison_page(p, MF_UNPOISON);
> >  	if (!ret) {
> > +		if (PageHuge(p)) {
> > +			count = free_raw_hwp_pages(page, false);
> 
> It seems the current behavior is: if any subpage of a hugetlb page is unpoisoned, then all of the
> hwpoisoned subpages will be unpoisoned. I'm not sure whether this is what we want.

Basically raw_hwp_info is not available to userspace (it might be recorded
in dmesg but not available via /proc/kpageflags), so unpoisoning error
subpages one-by-one is sometimes bothering.  If someone would like to
unpoison one-by-one (I expect nobody would), I can do this.

> Thanks.

Thank you!

- Naoya Horiguchi
Miaohe Lin July 11, 2022, 11:13 a.m. UTC | #3
On 2022/7/11 17:24, HORIGUCHI NAOYA(堀口 直也) wrote:
> On Mon, Jul 11, 2022 at 03:09:01PM +0800, Miaohe Lin wrote:
...
> 
> Basically raw_hwp_info is not available to userspace (it might be recorded
> in dmesg but not available via /proc/kpageflags), so unpoisoning error
> subpages one-by-one is sometimes bothering.  If someone would like to
> unpoison one-by-one (I expect nobody would), I can do this.

I see. Many thanks for explanation.

> 
>> Thanks.
> 
> Thank you!
> 
> - Naoya Horiguchi
>
diff mbox series

Patch

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index a01aeb3fcc0b..ddc98f96ad2c 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -498,6 +498,11 @@  static inline void num_poisoned_pages_dec(void)
 	atomic_long_dec(&num_poisoned_pages);
 }
 
+static inline void num_poisoned_pages_sub(long i)
+{
+	atomic_long_sub(i, &num_poisoned_pages);
+}
+
 #else
 
 static inline swp_entry_t make_hwpoison_entry(struct page *page)
@@ -518,6 +523,10 @@  static inline struct page *hwpoison_entry_to_page(swp_entry_t entry)
 static inline void num_poisoned_pages_inc(void)
 {
 }
+
+static inline void num_poisoned_pages_sub(long i)
+{
+}
 #endif
 
 static inline int non_swap_entry(swp_entry_t entry)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6833c5e4b410..89e74ec8a95f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1720,22 +1720,41 @@  static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page)
 	return ret;
 }
 
-int hugetlb_clear_page_hwpoison(struct page *hpage)
+static long free_raw_hwp_pages(struct page *hpage, bool move_flag)
 {
 	struct llist_head *head;
 	struct llist_node *t, *tnode;
+	long count = 0;
 
-	if (!HPageRawHwpUnreliable(hpage))
-		ClearPageHWPoison(hpage);
+	/*
+	 * HPageVmemmapOptimized hugepages can't be unpoisoned because
+	 * struct pages for tail pages are required to free hwpoisoned
+	 * hugepages.  HPageRawHwpUnreliable hugepages shouldn't be
+	 * unpoisoned by definition.
+	 */
+	if (HPageVmemmapOptimized(hpage) || HPageRawHwpUnreliable(hpage))
+		return 0;
 	head = raw_hwp_list_head(hpage);
 	llist_for_each_safe(tnode, t, head->first) {
 		struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
 
-		SetPageHWPoison(p->page);
+		if (move_flag)
+			SetPageHWPoison(p->page);
 		kfree(p);
+		count++;
 	}
 	llist_del_all(head);
-	return 0;
+	return count;
+}
+
+int hugetlb_clear_page_hwpoison(struct page *hpage)
+{
+	int ret = -EBUSY;
+
+	if (!HPageRawHwpUnreliable(hpage))
+		ret = !TestClearPageHWPoison(hpage);
+	free_raw_hwp_pages(hpage, true);
+	return ret;
 }
 
 /*
@@ -1879,6 +1898,10 @@  static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *
 	return 0;
 }
 
+static inline long free_raw_hwp_pages(struct page *hpage, bool move_flag)
+{
+	return 0;
+}
 #endif	/* CONFIG_HUGETLB_PAGE */
 
 static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
@@ -2284,6 +2307,7 @@  int unpoison_memory(unsigned long pfn)
 	struct page *p;
 	int ret = -EBUSY;
 	int freeit = 0;
+	long count = 1;
 	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
 					DEFAULT_RATELIMIT_BURST);
 
@@ -2331,6 +2355,13 @@  int unpoison_memory(unsigned long pfn)
 
 	ret = get_hwpoison_page(p, MF_UNPOISON);
 	if (!ret) {
+		if (PageHuge(p)) {
+			count = free_raw_hwp_pages(page, false);
+			if (count == 0) {
+				ret = -EBUSY;
+				goto unlock_mutex;
+			}
+		}
 		ret = TestClearPageHWPoison(page) ? 0 : -EBUSY;
 	} else if (ret < 0) {
 		if (ret == -EHWPOISON) {
@@ -2339,6 +2370,13 @@  int unpoison_memory(unsigned long pfn)
 			unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
 					 pfn, &unpoison_rs);
 	} else {
+		if (PageHuge(p)) {
+			count = free_raw_hwp_pages(page, false);
+			if (count == 0) {
+				ret = -EBUSY;
+				goto unlock_mutex;
+			}
+		}
 		freeit = !!TestClearPageHWPoison(p);
 
 		put_page(page);
@@ -2351,7 +2389,7 @@  int unpoison_memory(unsigned long pfn)
 unlock_mutex:
 	mutex_unlock(&mf_mutex);
 	if (!ret || freeit) {
-		num_poisoned_pages_dec();
+		num_poisoned_pages_sub(count);
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 page_to_pfn(p), &unpoison_rs);
 	}