diff mbox series

mm: Convert DAX lock/unlock page to lock/unlock folio

Message ID 20230822231314.349200-1-willy@infradead.org (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series mm: Convert DAX lock/unlock page to lock/unlock folio | expand

Commit Message

Matthew Wilcox Aug. 22, 2023, 11:13 p.m. UTC
The one caller of DAX lock/unlock page already calls compound_head(),
so use page_folio() instead, then use a folio throughout the DAX code
to remove uses of page->mapping and page->index.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/dax.c            | 24 ++++++++++++------------
 include/linux/dax.h | 10 +++++-----
 mm/memory-failure.c | 19 +++++++------------
 3 files changed, 24 insertions(+), 29 deletions(-)

Comments

Naoya Horiguchi Aug. 23, 2023, 5:51 a.m. UTC | #1
On Wed, Aug 23, 2023 at 12:13:14AM +0100, Matthew Wilcox (Oracle) wrote:
> The one caller of DAX lock/unlock page already calls compound_head(),
> so use page_folio() instead, then use a folio throughout the DAX code
> to remove uses of page->mapping and page->index.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Looks like straightforward replacement, so I found no issue.

Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Jane Chu Aug. 24, 2023, 6:24 p.m. UTC | #2
On 8/22/2023 4:13 PM, Matthew Wilcox (Oracle) wrote:
[..]
>   
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index a6c3af985554..b81d6eb4e6ff 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1717,16 +1717,11 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
>   		struct dev_pagemap *pgmap)
>   {
>   	struct page *page = pfn_to_page(pfn);

Looks like the above line, that is, the 'page' pointer is no longer needed.

> +	struct folio *folio = page_folio(page);
>   	LIST_HEAD(to_kill);
>   	dax_entry_t cookie;
>   	int rc = 0;
>   
> -	/*
> -	 * Pages instantiated by device-dax (not filesystem-dax)
> -	 * may be compound pages.
> -	 */
> -	page = compound_head(page);
> -
>   	/*
>   	 * Prevent the inode from being freed while we are interrogating
>   	 * the address_space, typically this would be handled by
> @@ -1734,11 +1729,11 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
>   	 * also prevents changes to the mapping of this pfn until
>   	 * poison signaling is complete.
>   	 */
> -	cookie = dax_lock_page(page);
> +	cookie = dax_lock_folio(folio);
>   	if (!cookie)
>   		return -EBUSY;
>   
> -	if (hwpoison_filter(page)) {
> +	if (hwpoison_filter(&folio->page)) {
>   		rc = -EOPNOTSUPP;
>   		goto unlock;
>   	}
> @@ -1760,7 +1755,7 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
>   	 * Use this flag as an indication that the dax page has been
>   	 * remapped UC to prevent speculative consumption of poison.
>   	 */
> -	SetPageHWPoison(page);
> +	SetPageHWPoison(&folio->page);
>   
>   	/*
>   	 * Unlike System-RAM there is no possibility to swap in a
> @@ -1769,11 +1764,11 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
>   	 * SIGBUS (i.e. MF_MUST_KILL)
>   	 */
>   	flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
> -	collect_procs(page, &to_kill, true);
> +	collect_procs(&folio->page, &to_kill, true);
>   
> -	unmap_and_kill(&to_kill, pfn, page->mapping, page->index, flags);
> +	unmap_and_kill(&to_kill, pfn, folio->mapping, folio->index, flags);
>   unlock:
> -	dax_unlock_page(page, cookie);
> +	dax_unlock_folio(folio, cookie);
>   	return rc;
>   }
>  


thanks,
-jane
Matthew Wilcox Aug. 24, 2023, 7:09 p.m. UTC | #3
On Thu, Aug 24, 2023 at 11:24:20AM -0700, Jane Chu wrote:
> 
> On 8/22/2023 4:13 PM, Matthew Wilcox (Oracle) wrote:
> [..]
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index a6c3af985554..b81d6eb4e6ff 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -1717,16 +1717,11 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
> >   		struct dev_pagemap *pgmap)
> >   {
> >   	struct page *page = pfn_to_page(pfn);
> 
> Looks like the above line, that is, the 'page' pointer is no longer needed.

So ...

It seems to me that currently handling of hwpoison for DAX memory is
handled on a per-allocation basis but it should probably be handled
on a per-page basis eventually?

If so, we'd want to do something like this ...

+++ b/mm/memory-failure.c
@@ -1755,7 +1755,9 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
         * Use this flag as an indication that the dax page has been
         * remapped UC to prevent speculative consumption of poison.
         */
-       SetPageHWPoison(&folio->page);
+       SetPageHWPoison(page);
+       if (folio_test_large(folio))
+               folio_set_has_hwpoisoned(folio);

        /*
         * Unlike System-RAM there is no possibility to swap in a
@@ -1766,7 +1768,8 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
        flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
        collect_procs(&folio->page, &to_kill, true);

-       unmap_and_kill(&to_kill, pfn, folio->mapping, folio->index, flags);
+       unmap_and_kill(&to_kill, pfn, folio->mapping,
+                       folio->index + folio_page_idx(folio, page), flags);
 unlock:
        dax_unlock_folio(folio, cookie);
        return rc;

But this is a change in current behaviour and I didn't want to think
through the implications of all of this.  Would you like to take on this
project?  ;-)


My vague plan for hwpoison in the memdesc world is that poison is always
handled on a per-page basis (by means of setting page->memdesc to a
hwpoison data structure).  If the allocation contains multiple pages,
then we set a flag somewhere like the current has_hwpoisoned flag.
Jane Chu Aug. 24, 2023, 9:30 p.m. UTC | #4
On 8/24/2023 12:09 PM, Matthew Wilcox wrote:
> On Thu, Aug 24, 2023 at 11:24:20AM -0700, Jane Chu wrote:
>>
>> On 8/22/2023 4:13 PM, Matthew Wilcox (Oracle) wrote:
>> [..]
>>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>>> index a6c3af985554..b81d6eb4e6ff 100644
>>> --- a/mm/memory-failure.c
>>> +++ b/mm/memory-failure.c
>>> @@ -1717,16 +1717,11 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
>>>    		struct dev_pagemap *pgmap)
>>>    {
>>>    	struct page *page = pfn_to_page(pfn);
>>
>> Looks like the above line, that is, the 'page' pointer is no longer needed.
> 
> So ...
> 
> It seems to me that currently handling of hwpoison for DAX memory is
> handled on a per-allocation basis but it should probably be handled
> on a per-page basis eventually?

My recollection is that since the inception of 
memory_failure_dev_pagemap(), dax poison handling is kind of on per-page 
basis in that, the .si_addr points to the subpage vaddr, though the 
.si_lsb indicates the user mapping size.

> 
> If so, we'd want to do something like this ...
> 
> +++ b/mm/memory-failure.c
> @@ -1755,7 +1755,9 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
>           * Use this flag as an indication that the dax page has been
>           * remapped UC to prevent speculative consumption of poison.
>           */
> -       SetPageHWPoison(&folio->page);
> +       SetPageHWPoison(page);
> +       if (folio_test_large(folio))
> +               folio_set_has_hwpoisoned(folio);
> 
>          /*
>           * Unlike System-RAM there is no possibility to swap in a
> @@ -1766,7 +1768,8 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
>          flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
>          collect_procs(&folio->page, &to_kill, true);
> 
> -       unmap_and_kill(&to_kill, pfn, folio->mapping, folio->index, flags);
> +       unmap_and_kill(&to_kill, pfn, folio->mapping,
> +                       folio->index + folio_page_idx(folio, page), flags);
>   unlock:
>          dax_unlock_folio(folio, cookie);
>          return rc;
> 

This change make sense to me.
mf_generic_kill_procs() is the generic version if DAX-FS does not 
register/provide dax_dev->holder_ops->notify_failure. Currently only 
DAX-XFS does the registration and utilizes the specific version: 
mf_dax_kill_procs().

> But this is a change in current behaviour and I didn't want to think
> through the implications of all of this.  Would you like to take on this
> project?  ;-)
> 
Sure, be happy to.

> 
> My vague plan for hwpoison in the memdesc world is that poison is always
> handled on a per-page basis (by means of setting page->memdesc to a
> hwpoison data structure).  If the allocation contains multiple pages,
> then we set a flag somewhere like the current has_hwpoisoned flag.

Could you elaborate on "by means of setting page->memdesc to a hwpoison 
data structure" please?

As for the PG_has_hwpoisoned flag, I see one reference for now -
$ git grep folio_test_has_hwpoisoned
mm/shmem.c:                 folio_test_has_hwpoisoned(folio)))
$ git grep folio_clear_has_hwpoisoned
<none>

A dax thp page is a thp page that is potentially recoverable from 
hwpoison(s). If a dax thp page has multiple hwpoisons, only when the 
last poisoned subpage is recovered, could we call 
folio_clear_has_hwpoisoned() - this implies keeping track of the number 
of poisoned subpages per thp somehow, any thoughts on the best thing to 
do?  hmm, maybe add a field in pgmap? or add a query to the driver to 
return whether there is any hwpoison in a given range?

thanks!
-jane
diff mbox series

Patch

diff --git a/fs/dax.c b/fs/dax.c
index 8fafecbe42b1..3380b43cb6bb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -412,23 +412,23 @@  static struct page *dax_busy_page(void *entry)
 	return NULL;
 }
 
-/*
- * dax_lock_page - Lock the DAX entry corresponding to a page
- * @page: The page whose entry we want to lock
+/**
+ * dax_lock_folio - Lock the DAX entry corresponding to a folio
+ * @folio: The folio whose entry we want to lock
  *
  * Context: Process context.
- * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could
+ * Return: A cookie to pass to dax_unlock_folio() or 0 if the entry could
  * not be locked.
  */
-dax_entry_t dax_lock_page(struct page *page)
+dax_entry_t dax_lock_folio(struct folio *folio)
 {
 	XA_STATE(xas, NULL, 0);
 	void *entry;
 
-	/* Ensure page->mapping isn't freed while we look at it */
+	/* Ensure folio->mapping isn't freed while we look at it */
 	rcu_read_lock();
 	for (;;) {
-		struct address_space *mapping = READ_ONCE(page->mapping);
+		struct address_space *mapping = READ_ONCE(folio->mapping);
 
 		entry = NULL;
 		if (!mapping || !dax_mapping(mapping))
@@ -447,11 +447,11 @@  dax_entry_t dax_lock_page(struct page *page)
 
 		xas.xa = &mapping->i_pages;
 		xas_lock_irq(&xas);
-		if (mapping != page->mapping) {
+		if (mapping != folio->mapping) {
 			xas_unlock_irq(&xas);
 			continue;
 		}
-		xas_set(&xas, page->index);
+		xas_set(&xas, folio->index);
 		entry = xas_load(&xas);
 		if (dax_is_locked(entry)) {
 			rcu_read_unlock();
@@ -467,10 +467,10 @@  dax_entry_t dax_lock_page(struct page *page)
 	return (dax_entry_t)entry;
 }
 
-void dax_unlock_page(struct page *page, dax_entry_t cookie)
+void dax_unlock_folio(struct folio *folio, dax_entry_t cookie)
 {
-	struct address_space *mapping = page->mapping;
-	XA_STATE(xas, &mapping->i_pages, page->index);
+	struct address_space *mapping = folio->mapping;
+	XA_STATE(xas, &mapping->i_pages, folio->index);
 
 	if (S_ISCHR(mapping->host->i_mode))
 		return;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 22cd9902345d..b463502b16e1 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -159,8 +159,8 @@  int dax_writeback_mapping_range(struct address_space *mapping,
 
 struct page *dax_layout_busy_page(struct address_space *mapping);
 struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
-dax_entry_t dax_lock_page(struct page *page);
-void dax_unlock_page(struct page *page, dax_entry_t cookie);
+dax_entry_t dax_lock_folio(struct folio *folio);
+void dax_unlock_folio(struct folio *folio, dax_entry_t cookie);
 dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
 		unsigned long index, struct page **page);
 void dax_unlock_mapping_entry(struct address_space *mapping,
@@ -182,14 +182,14 @@  static inline int dax_writeback_mapping_range(struct address_space *mapping,
 	return -EOPNOTSUPP;
 }
 
-static inline dax_entry_t dax_lock_page(struct page *page)
+static inline dax_entry_t dax_lock_folio(struct folio *folio)
 {
-	if (IS_DAX(page->mapping->host))
+	if (IS_DAX(folio->mapping->host))
 		return ~0UL;
 	return 0;
 }
 
-static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
+static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie)
 {
 }
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a6c3af985554..b81d6eb4e6ff 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1717,16 +1717,11 @@  static int mf_generic_kill_procs(unsigned long long pfn, int flags,
 		struct dev_pagemap *pgmap)
 {
 	struct page *page = pfn_to_page(pfn);
+	struct folio *folio = page_folio(page);
 	LIST_HEAD(to_kill);
 	dax_entry_t cookie;
 	int rc = 0;
 
-	/*
-	 * Pages instantiated by device-dax (not filesystem-dax)
-	 * may be compound pages.
-	 */
-	page = compound_head(page);
-
 	/*
 	 * Prevent the inode from being freed while we are interrogating
 	 * the address_space, typically this would be handled by
@@ -1734,11 +1729,11 @@  static int mf_generic_kill_procs(unsigned long long pfn, int flags,
 	 * also prevents changes to the mapping of this pfn until
 	 * poison signaling is complete.
 	 */
-	cookie = dax_lock_page(page);
+	cookie = dax_lock_folio(folio);
 	if (!cookie)
 		return -EBUSY;
 
-	if (hwpoison_filter(page)) {
+	if (hwpoison_filter(&folio->page)) {
 		rc = -EOPNOTSUPP;
 		goto unlock;
 	}
@@ -1760,7 +1755,7 @@  static int mf_generic_kill_procs(unsigned long long pfn, int flags,
 	 * Use this flag as an indication that the dax page has been
 	 * remapped UC to prevent speculative consumption of poison.
 	 */
-	SetPageHWPoison(page);
+	SetPageHWPoison(&folio->page);
 
 	/*
 	 * Unlike System-RAM there is no possibility to swap in a
@@ -1769,11 +1764,11 @@  static int mf_generic_kill_procs(unsigned long long pfn, int flags,
 	 * SIGBUS (i.e. MF_MUST_KILL)
 	 */
 	flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
-	collect_procs(page, &to_kill, true);
+	collect_procs(&folio->page, &to_kill, true);
 
-	unmap_and_kill(&to_kill, pfn, page->mapping, page->index, flags);
+	unmap_and_kill(&to_kill, pfn, folio->mapping, folio->index, flags);
 unlock:
-	dax_unlock_page(page, cookie);
+	dax_unlock_folio(folio, cookie);
 	return rc;
 }