diff mbox series

[v2,2/4] mm/hwpoison: check if a subpage of a hugetlb folio is raw HWPOISON

Message ID 20230623164015.3431990-3-jiaqiyan@google.com (mailing list archive)
State New
Headers show
Series Improve hugetlbfs read on HWPOISON hugepages | expand

Commit Message

Jiaqi Yan June 23, 2023, 4:40 p.m. UTC
Adds the functionality to tell if a subpage of a hugetlb folio is a
raw HWPOISON page. This functionality relies on RawHwpUnreliable to
be not set; otherwise hugepage's HWPOISON list becomes meaningless.

Exports this functionality to be immediately used in the read operation
for hugetlbfs.

Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
---
 include/linux/hugetlb.h | 19 +++++++++++++++++++
 include/linux/mm.h      |  7 +++++++
 mm/hugetlb.c            | 10 ++++++++++
 mm/memory-failure.c     | 34 ++++++++++++++++++++++++----------
 4 files changed, 60 insertions(+), 10 deletions(-)

Comments

Mike Kravetz July 5, 2023, 11:57 p.m. UTC | #1
On 06/23/23 16:40, Jiaqi Yan wrote:
> Adds the functionality to tell if a subpage of a hugetlb folio is a
> raw HWPOISON page. This functionality relies on RawHwpUnreliable to
> be not set; otherwise hugepage's HWPOISON list becomes meaningless.
> 
> Exports this functionality to be immediately used in the read operation
> for hugetlbfs.
> 
> Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
> ---
>  include/linux/hugetlb.h | 19 +++++++++++++++++++
>  include/linux/mm.h      |  7 +++++++
>  mm/hugetlb.c            | 10 ++++++++++
>  mm/memory-failure.c     | 34 ++++++++++++++++++++++++----------
>  4 files changed, 60 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 21f942025fec..8b73a12b7b38 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node);
>  void hugetlb_unregister_node(struct node *node);
>  #endif
>  
> +/*
> + * Struct raw_hwp_page represents information about "raw error page",
> + * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
> + */
> +struct raw_hwp_page {
> +	struct llist_node node;
> +	struct page *page;
> +};
> +
> +static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
> +{
> +	return (struct llist_head *)&folio->_hugetlb_hwpoison;
> +}
> +
> +/*
> + * Check if a given raw @subpage in a hugepage @folio is HWPOISON.
> + */
> +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> +
>  #else	/* CONFIG_HUGETLB_PAGE */
>  struct hstate {};
>  
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 66032f0d515c..41a283bd41a7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group;
>  extern void memory_failure_queue(unsigned long pfn, int flags);
>  extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>  					bool *migratable_cleared);
> +extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
>  void num_poisoned_pages_inc(unsigned long pfn);
>  void num_poisoned_pages_sub(unsigned long pfn, long i);
>  struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
> @@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>  	return 0;
>  }
>  
> +static inline bool __is_raw_hwp_subpage(struct folio *folio,
> +					struct page *subpage)
> +{
> +	return false;
> +}
> +
>  static inline void num_poisoned_pages_inc(unsigned long pfn)
>  {
>  }
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index ea24718db4af..6b860de87590 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>  	return ret;
>  }
>  
> +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> +{
> +	bool ret;
> +
> +	spin_lock_irq(&hugetlb_lock);
> +	ret = __is_raw_hwp_subpage(folio, subpage);
> +	spin_unlock_irq(&hugetlb_lock);

Can you describe what races the hugetlb_lock prevents here?
Jiaqi Yan July 6, 2023, 6:25 p.m. UTC | #2
On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
>
> On 06/23/23 16:40, Jiaqi Yan wrote:
> > Adds the functionality to tell if a subpage of a hugetlb folio is a
> > raw HWPOISON page. This functionality relies on RawHwpUnreliable to
> > be not set; otherwise hugepage's HWPOISON list becomes meaningless.
> >
> > Exports this functionality to be immediately used in the read operation
> > for hugetlbfs.
> >
> > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
> > ---
> >  include/linux/hugetlb.h | 19 +++++++++++++++++++
> >  include/linux/mm.h      |  7 +++++++
> >  mm/hugetlb.c            | 10 ++++++++++
> >  mm/memory-failure.c     | 34 ++++++++++++++++++++++++----------
> >  4 files changed, 60 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index 21f942025fec..8b73a12b7b38 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node);
> >  void hugetlb_unregister_node(struct node *node);
> >  #endif
> >
> > +/*
> > + * Struct raw_hwp_page represents information about "raw error page",
> > + * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
> > + */
> > +struct raw_hwp_page {
> > +     struct llist_node node;
> > +     struct page *page;
> > +};
> > +
> > +static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
> > +{
> > +     return (struct llist_head *)&folio->_hugetlb_hwpoison;
> > +}
> > +
> > +/*
> > + * Check if a given raw @subpage in a hugepage @folio is HWPOISON.
> > + */
> > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> > +
> >  #else        /* CONFIG_HUGETLB_PAGE */
> >  struct hstate {};
> >
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 66032f0d515c..41a283bd41a7 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group;
> >  extern void memory_failure_queue(unsigned long pfn, int flags);
> >  extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> >                                       bool *migratable_cleared);
> > +extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> >  void num_poisoned_pages_inc(unsigned long pfn);
> >  void num_poisoned_pages_sub(unsigned long pfn, long i);
> >  struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
> > @@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> >       return 0;
> >  }
> >
> > +static inline bool __is_raw_hwp_subpage(struct folio *folio,
> > +                                     struct page *subpage)
> > +{
> > +     return false;
> > +}
> > +
> >  static inline void num_poisoned_pages_inc(unsigned long pfn)
> >  {
> >  }
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index ea24718db4af..6b860de87590 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> >       return ret;
> >  }
> >
> > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> > +{
> > +     bool ret;
> > +
> > +     spin_lock_irq(&hugetlb_lock);
> > +     ret = __is_raw_hwp_subpage(folio, subpage);
> > +     spin_unlock_irq(&hugetlb_lock);
>
> Can you describe what races the hugetlb_lock prevents here?

I think we should sync here with __get_huge_page_for_hwpoison, who
iterates and inserts an entry to raw_hwp_list. llist itself doesn't
ensure insertion is synchronized with iterating from
__is_raw_hwp_subpage.

> --
> Mike Kravetz
>
> > +     return ret;
> > +}
> > +
> >  void folio_putback_active_hugetlb(struct folio *folio)
> >  {
> >       spin_lock_irq(&hugetlb_lock);
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index c415c3c462a3..891248e2930e 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -1809,18 +1809,32 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
> >  #endif /* CONFIG_FS_DAX */
> >
> >  #ifdef CONFIG_HUGETLB_PAGE
> > -/*
> > - * Struct raw_hwp_page represents information about "raw error page",
> > - * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
> > - */
> > -struct raw_hwp_page {
> > -     struct llist_node node;
> > -     struct page *page;
> > -};
> >
> > -static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
> > +bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> >  {
> > -     return (struct llist_head *)&folio->_hugetlb_hwpoison;
> > +     struct llist_head *raw_hwp_head;
> > +     struct raw_hwp_page *p, *tmp;
> > +     bool ret = false;
> > +
> > +     if (!folio_test_hwpoison(folio))
> > +             return false;
> > +
> > +     /*
> > +      * When RawHwpUnreliable is set, kernel lost track of which subpages
> > +      * are HWPOISON. So return as if ALL subpages are HWPOISONed.
> > +      */
> > +     if (folio_test_hugetlb_raw_hwp_unreliable(folio))
> > +             return true;
> > +
> > +     raw_hwp_head = raw_hwp_list_head(folio);
> > +     llist_for_each_entry_safe(p, tmp, raw_hwp_head->first, node) {
> > +             if (subpage == p->page) {
> > +                     ret = true;
> > +                     break;
> > +             }
> > +     }
> > +
> > +     return ret;
> >  }
> >
> >  static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
> > --
> > 2.41.0.162.gfafddb0af9-goog
> >
Mike Kravetz July 6, 2023, 10:06 p.m. UTC | #3
On 07/06/23 11:25, Jiaqi Yan wrote:
> On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
> > On 06/23/23 16:40, Jiaqi Yan wrote:
> > >
> > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> > > +{
> > > +     bool ret;
> > > +
> > > +     spin_lock_irq(&hugetlb_lock);
> > > +     ret = __is_raw_hwp_subpage(folio, subpage);
> > > +     spin_unlock_irq(&hugetlb_lock);
> >
> > Can you describe what races the hugetlb_lock prevents here?
> 
> I think we should sync here with __get_huge_page_for_hwpoison, who
> iterates and inserts an entry to raw_hwp_list. llist itself doesn't
> ensure insertion is synchronized with iterating from
> __is_raw_hwp_subpage.
> 

Ok, makes sense.  And, since this is only called in the file read patch
when we encounter a PageHWPoison(page), the overhead of the lock cycles
is not of concern.

You can add,

Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Naoya Horiguchi July 7, 2023, 1:06 a.m. UTC | #4
On Fri, Jun 23, 2023 at 04:40:13PM +0000, Jiaqi Yan wrote:
> Adds the functionality to tell if a subpage of a hugetlb folio is a
> raw HWPOISON page. This functionality relies on RawHwpUnreliable to
> be not set; otherwise hugepage's HWPOISON list becomes meaningless.
> 
> Exports this functionality to be immediately used in the read operation
> for hugetlbfs.
> 
> Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>

Looks good to me, thank you.

Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Jiaqi Yan July 7, 2023, 1:27 a.m. UTC | #5
On Thu, Jul 6, 2023 at 3:06 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
>
> On 07/06/23 11:25, Jiaqi Yan wrote:
> > On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
> > > On 06/23/23 16:40, Jiaqi Yan wrote:
> > > >
> > > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> > > > +{
> > > > +     bool ret;
> > > > +
> > > > +     spin_lock_irq(&hugetlb_lock);
> > > > +     ret = __is_raw_hwp_subpage(folio, subpage);
> > > > +     spin_unlock_irq(&hugetlb_lock);
> > >
> > > Can you describe what races the hugetlb_lock prevents here?
> >
> > I think we should sync here with __get_huge_page_for_hwpoison, who
> > iterates and inserts an entry to raw_hwp_list. llist itself doesn't
> > ensure insertion is synchronized with iterating from
> > __is_raw_hwp_subpage.
> >
>
> Ok, makes sense.  And, since this is only called in the file read patch
> when we encounter a PageHWPoison(page), the overhead of the lock cycles
> is not of concern.

Yes, thanks for pointing this out, (which I forgot), as
is_raw_hwp_subpage() is after PageHWPoison check in
hugetlbfs_read_iter. I think both this and the reason why holding the
lock is worth mentioning in the commit msg.


>
> You can add,
>
> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
> --
> Mike Kravetz
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 21f942025fec..8b73a12b7b38 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1013,6 +1013,25 @@  void hugetlb_register_node(struct node *node);
 void hugetlb_unregister_node(struct node *node);
 #endif
 
+/*
+ * Struct raw_hwp_page represents information about "raw error page",
+ * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
+ */
+struct raw_hwp_page {
+	struct llist_node node;
+	struct page *page;
+};
+
+static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
+{
+	return (struct llist_head *)&folio->_hugetlb_hwpoison;
+}
+
+/*
+ * Check if a given raw @subpage in a hugepage @folio is HWPOISON.
+ */
+bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
+
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 66032f0d515c..41a283bd41a7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3671,6 +3671,7 @@  extern const struct attribute_group memory_failure_attr_group;
 extern void memory_failure_queue(unsigned long pfn, int flags);
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared);
+extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
 void num_poisoned_pages_inc(unsigned long pfn);
 void num_poisoned_pages_sub(unsigned long pfn, long i);
 struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
@@ -3685,6 +3686,12 @@  static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 	return 0;
 }
 
+static inline bool __is_raw_hwp_subpage(struct folio *folio,
+					struct page *subpage)
+{
+	return false;
+}
+
 static inline void num_poisoned_pages_inc(unsigned long pfn)
 {
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ea24718db4af..6b860de87590 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7377,6 +7377,16 @@  int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 	return ret;
 }
 
+bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
+{
+	bool ret;
+
+	spin_lock_irq(&hugetlb_lock);
+	ret = __is_raw_hwp_subpage(folio, subpage);
+	spin_unlock_irq(&hugetlb_lock);
+	return ret;
+}
+
 void folio_putback_active_hugetlb(struct folio *folio)
 {
 	spin_lock_irq(&hugetlb_lock);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c415c3c462a3..891248e2930e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1809,18 +1809,32 @@  EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
 #endif /* CONFIG_FS_DAX */
 
 #ifdef CONFIG_HUGETLB_PAGE
-/*
- * Struct raw_hwp_page represents information about "raw error page",
- * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
- */
-struct raw_hwp_page {
-	struct llist_node node;
-	struct page *page;
-};
 
-static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
+bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
 {
-	return (struct llist_head *)&folio->_hugetlb_hwpoison;
+	struct llist_head *raw_hwp_head;
+	struct raw_hwp_page *p, *tmp;
+	bool ret = false;
+
+	if (!folio_test_hwpoison(folio))
+		return false;
+
+	/*
+	 * When RawHwpUnreliable is set, kernel lost track of which subpages
+	 * are HWPOISON. So return as if ALL subpages are HWPOISONed.
+	 */
+	if (folio_test_hugetlb_raw_hwp_unreliable(folio))
+		return true;
+
+	raw_hwp_head = raw_hwp_list_head(folio);
+	llist_for_each_entry_safe(p, tmp, raw_hwp_head->first, node) {
+		if (subpage == p->page) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
 }
 
 static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)