diff mbox series

[RFC,v2,1/8] ext4/xfs: add page refcount helper

Message ID 20210607204226.7743-2-alex.sierra@amd.com (mailing list archive)
State New, archived
Headers show
Series Support DEVICE_GENERIC memory in migrate_vma_* | expand

Commit Message

Sierra Guiza, Alejandro (Alex) June 7, 2021, 8:42 p.m. UTC
From: Ralph Campbell <rcampbell@nvidia.com>

There are several places where ZONE_DEVICE struct pages assume a reference
count == 1 means the page is idle and free. Instead of open coding this,
add a helper function to hide this detail.

Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
---
 fs/dax.c            |  4 ++--
 fs/ext4/inode.c     |  5 +----
 fs/xfs/xfs_file.c   |  4 +---
 include/linux/dax.h | 10 ++++++++++
 4 files changed, 14 insertions(+), 9 deletions(-)

Comments

Liam R. Howlett June 8, 2021, 12:29 a.m. UTC | #1
* Alex Sierra <alex.sierra@amd.com> [210607 16:43]:
> From: Ralph Campbell <rcampbell@nvidia.com>
> 
> There are several places where ZONE_DEVICE struct pages assume a reference
> count == 1 means the page is idle and free. Instead of open coding this,
> add a helper function to hide this detail.
> 
> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
> ---
>  fs/dax.c            |  4 ++--
>  fs/ext4/inode.c     |  5 +----
>  fs/xfs/xfs_file.c   |  4 +---
>  include/linux/dax.h | 10 ++++++++++
>  4 files changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 26d5dcd2d69e..321f4ddc6643 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -358,7 +358,7 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
>  	for_each_mapped_pfn(entry, pfn) {
>  		struct page *page = pfn_to_page(pfn);
>  
> -		WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
> +		WARN_ON_ONCE(trunc && !dax_layout_is_idle_page(page));
>  		WARN_ON_ONCE(page->mapping && page->mapping != mapping);
>  		page->mapping = NULL;
>  		page->index = 0;
> @@ -372,7 +372,7 @@ static struct page *dax_busy_page(void *entry)
>  	for_each_mapped_pfn(entry, pfn) {
>  		struct page *page = pfn_to_page(pfn);
>  
> -		if (page_ref_count(page) > 1)
> +		if (!dax_layout_is_idle_page(page))
>  			return page;
>  	}
>  	return NULL;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index c173c8405856..9ee00186412f 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3972,10 +3972,7 @@ int ext4_break_layouts(struct inode *inode)
>  		if (!page)
>  			return 0;
>  
> -		error = ___wait_var_event(&page->_refcount,
> -				atomic_read(&page->_refcount) == 1,
> -				TASK_INTERRUPTIBLE, 0, 0,
> -				ext4_wait_dax_page(ei));
> +		error = dax_wait_page(ei, page, ext4_wait_dax_page);
>  	} while (error == 0);
>  
>  	return error;
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 5b0f93f73837..39565fe5f817 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -782,9 +782,7 @@ xfs_break_dax_layouts(
>  		return 0;
>  
>  	*retry = true;
> -	return ___wait_var_event(&page->_refcount,
> -			atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
> -			0, 0, xfs_wait_dax_page(inode));
> +	return dax_wait_page(inode, page, xfs_wait_dax_page);
>  }
>  
>  int
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index b52f084aa643..8909a91cd381 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -243,6 +243,16 @@ static inline bool dax_mapping(struct address_space *mapping)
>  	return mapping->host && IS_DAX(mapping->host);
>  }
>  
> +static inline bool dax_layout_is_idle_page(struct page *page)
> +{
> +	return page_ref_count(page) == 1;
> +}

If this races with page_ref_count(page) == 0, then it will return false
that a page is idle when the page is being freed.  I don't know the code
well enough to say if this is an issue or not so please let me know.

For example:
!dax_layout_is_idle_page() will return true in dax_busy_page() above
when the count is 0 and return the page.

Maybe you are sure to have at least one reference when calling this?  It
might be worth adding a comment.

> +
> +#define dax_wait_page(_inode, _page, _wait_cb)				\
> +	___wait_var_event(&(_page)->_refcount,				\
> +		dax_layout_is_idle_page(_page),				\
> +		TASK_INTERRUPTIBLE, 0, 0, _wait_cb(_inode))
> +
>  #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
>  void hmem_register_device(int target_nid, struct resource *r);
>  #else
> -- 
> 2.17.1
> 
>
Matthew Wilcox June 8, 2021, 2:33 p.m. UTC | #2
On Tue, Jun 08, 2021 at 12:29:04AM +0000, Liam Howlett wrote:
> * Alex Sierra <alex.sierra@amd.com> [210607 16:43]:
> > From: Ralph Campbell <rcampbell@nvidia.com>
> > 
> > There are several places where ZONE_DEVICE struct pages assume a reference
> > count == 1 means the page is idle and free. Instead of open coding this,
> > add a helper function to hide this detail.
> > 
> > diff --git a/include/linux/dax.h b/include/linux/dax.h
> > index b52f084aa643..8909a91cd381 100644
> > --- a/include/linux/dax.h
> > +++ b/include/linux/dax.h
> > @@ -243,6 +243,16 @@ static inline bool dax_mapping(struct address_space *mapping)
> >  	return mapping->host && IS_DAX(mapping->host);
> >  }
> >  
> > +static inline bool dax_layout_is_idle_page(struct page *page)
> > +{
> > +	return page_ref_count(page) == 1;
> > +}
> 
> If this races with page_ref_count(page) == 0, then it will return false
> that a page is idle when the page is being freed.  I don't know the code
> well enough to say if this is an issue or not so please let me know.
> 
> For example:
> !dax_layout_is_idle_page() will return true in dax_busy_page() above
> when the count is 0 and return the page.
> 
> Maybe you are sure to have at least one reference when calling this?  It
> might be worth adding a comment.

You're getting confused by the problem that the next patch fixes, which
is that devmap pages were stupidly given an elevated refcount.  devmap
pages are considered "free" when their refcount is 1.  See
put_page(), put_devmap_managed_page() and so on.
Matthew Wilcox June 9, 2021, 7:23 p.m. UTC | #3
On Mon, Jun 07, 2021 at 03:42:19PM -0500, Alex Sierra wrote:
> +++ b/include/linux/dax.h
> @@ -243,6 +243,16 @@ static inline bool dax_mapping(struct address_space *mapping)
>  	return mapping->host && IS_DAX(mapping->host);
>  }
>  
> +static inline bool dax_layout_is_idle_page(struct page *page)
> +{
> +	return page_ref_count(page) == 1;
> +}

We already have something called an idle page, and that's quite a
different thing from this.  How about dax_page_unused() (it's a use
count, so once it's got down to its minimum value, it's unused)?
Felix Kuehling June 14, 2021, 3:26 p.m. UTC | #4
Am 2021-06-09 um 3:23 p.m. schrieb Matthew Wilcox:
> On Mon, Jun 07, 2021 at 03:42:19PM -0500, Alex Sierra wrote:
>> +++ b/include/linux/dax.h
>> @@ -243,6 +243,16 @@ static inline bool dax_mapping(struct address_space *mapping)
>>  	return mapping->host && IS_DAX(mapping->host);
>>  }
>>  
>> +static inline bool dax_layout_is_idle_page(struct page *page)
>> +{
>> +	return page_ref_count(page) == 1;
>> +}
> We already have something called an idle page, and that's quite a
> different thing from this.  How about dax_page_unused() (it's a use
> count, so once it's got down to its minimum value, it's unused)?
>
Hi Matthew,

Thank you very much for your feedback. This patch looks straight-forward
enough, but do we need the filesystem maintainers to review this as
well? I guess we should CC the linux-ext4 and linux-xfs mailing lists in
the next revision.

Hi Ralph,

Are you OK if we update your patch with this suggestion?

Thanks,
  Felix
diff mbox series

Patch

diff --git a/fs/dax.c b/fs/dax.c
index 26d5dcd2d69e..321f4ddc6643 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -358,7 +358,7 @@  static void dax_disassociate_entry(void *entry, struct address_space *mapping,
 	for_each_mapped_pfn(entry, pfn) {
 		struct page *page = pfn_to_page(pfn);
 
-		WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+		WARN_ON_ONCE(trunc && !dax_layout_is_idle_page(page));
 		WARN_ON_ONCE(page->mapping && page->mapping != mapping);
 		page->mapping = NULL;
 		page->index = 0;
@@ -372,7 +372,7 @@  static struct page *dax_busy_page(void *entry)
 	for_each_mapped_pfn(entry, pfn) {
 		struct page *page = pfn_to_page(pfn);
 
-		if (page_ref_count(page) > 1)
+		if (!dax_layout_is_idle_page(page))
 			return page;
 	}
 	return NULL;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c173c8405856..9ee00186412f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3972,10 +3972,7 @@  int ext4_break_layouts(struct inode *inode)
 		if (!page)
 			return 0;
 
-		error = ___wait_var_event(&page->_refcount,
-				atomic_read(&page->_refcount) == 1,
-				TASK_INTERRUPTIBLE, 0, 0,
-				ext4_wait_dax_page(ei));
+		error = dax_wait_page(ei, page, ext4_wait_dax_page);
 	} while (error == 0);
 
 	return error;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5b0f93f73837..39565fe5f817 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -782,9 +782,7 @@  xfs_break_dax_layouts(
 		return 0;
 
 	*retry = true;
-	return ___wait_var_event(&page->_refcount,
-			atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
-			0, 0, xfs_wait_dax_page(inode));
+	return dax_wait_page(inode, page, xfs_wait_dax_page);
 }
 
 int
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b52f084aa643..8909a91cd381 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -243,6 +243,16 @@  static inline bool dax_mapping(struct address_space *mapping)
 	return mapping->host && IS_DAX(mapping->host);
 }
 
+static inline bool dax_layout_is_idle_page(struct page *page)
+{
+	return page_ref_count(page) == 1;
+}
+
+#define dax_wait_page(_inode, _page, _wait_cb)				\
+	___wait_var_event(&(_page)->_refcount,				\
+		dax_layout_is_idle_page(_page),				\
+		TASK_INTERRUPTIBLE, 0, 0, _wait_cb(_inode))
+
 #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
 void hmem_register_device(int target_nid, struct resource *r);
 #else