diff mbox series

[v14,101/138] iomap: Convert iomap_page_mkwrite to use a folio

Message ID 20210715033704.692967-102-willy@infradead.org (mailing list archive)
State New
Headers show
Series Memory folios | expand

Commit Message

Matthew Wilcox July 15, 2021, 3:36 a.m. UTC
If we write to any page in a folio, we have to mark the entire
folio as dirty, and potentially COW the entire folio, because it'll
all get written back as one unit.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

Comments

Darrick J. Wong July 15, 2021, 9:41 p.m. UTC | #1
On Thu, Jul 15, 2021 at 04:36:27AM +0100, Matthew Wilcox (Oracle) wrote:
> If we write to any page in a folio, we have to mark the entire
> folio as dirty, and potentially COW the entire folio, because it'll
> all get written back as one unit.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/buffered-io.c | 42 +++++++++++++++++++++---------------------
>  1 file changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 7c702d6c2f64..a3fe0d36c739 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -951,23 +951,23 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
>  }
>  EXPORT_SYMBOL_GPL(iomap_truncate_page);
>  
> -static loff_t
> -iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap, struct iomap *srcmap)
> +static loff_t iomap_folio_mkwrite_actor(struct inode *inode, loff_t pos,
> +		loff_t length, void *data, struct iomap *iomap,
> +		struct iomap *srcmap)
>  {
> -	struct page *page = data;
> -	struct folio *folio = page_folio(page);
> +	struct folio *folio = data;
>  	int ret;
>  
>  	if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
> -		ret = __block_write_begin_int(page, pos, length, NULL, iomap);
> +		ret = __block_write_begin_int(&folio->page, pos, length, NULL,
> +						iomap);
>  		if (ret)
>  			return ret;
> -		block_commit_write(page, 0, length);
> +		block_commit_write(&folio->page, 0, length);
>  	} else {
> -		WARN_ON_ONCE(!PageUptodate(page));
> +		WARN_ON_ONCE(!folio_test_uptodate(folio));
>  		iomap_page_create(inode, folio);
> -		set_page_dirty(page);
> +		folio_mark_dirty(folio);
>  	}
>  
>  	return length;
> @@ -975,33 +975,33 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
>  
>  vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
>  {
> -	struct page *page = vmf->page;
> +	struct folio *folio = page_folio(vmf->page);

If before the page fault the folio was a compound 2M page, will the
memory manager will have split it into 4k pages before passing it to us?

That's a roundabout way of asking if we should expect folio_mkwrite at
some point. ;)

The conversion looks pretty straightforward though.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D


>  	struct inode *inode = file_inode(vmf->vma->vm_file);
> -	unsigned long length;
> -	loff_t offset;
> +	size_t length;
> +	loff_t pos;
>  	ssize_t ret;
>  
> -	lock_page(page);
> -	ret = page_mkwrite_check_truncate(page, inode);
> +	folio_lock(folio);
> +	ret = folio_mkwrite_check_truncate(folio, inode);
>  	if (ret < 0)
>  		goto out_unlock;
>  	length = ret;
>  
> -	offset = page_offset(page);
> +	pos = folio_pos(folio);
>  	while (length > 0) {
> -		ret = iomap_apply(inode, offset, length,
> -				IOMAP_WRITE | IOMAP_FAULT, ops, page,
> -				iomap_page_mkwrite_actor);
> +		ret = iomap_apply(inode, pos, length,
> +				IOMAP_WRITE | IOMAP_FAULT, ops, folio,
> +				iomap_folio_mkwrite_actor);
>  		if (unlikely(ret <= 0))
>  			goto out_unlock;
> -		offset += ret;
> +		pos += ret;
>  		length -= ret;
>  	}
>  
> -	wait_for_stable_page(page);
> +	folio_wait_stable(folio);
>  	return VM_FAULT_LOCKED;
>  out_unlock:
> -	unlock_page(page);
> +	folio_unlock(folio);
>  	return block_page_mkwrite_return(ret);
>  }
>  EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
> -- 
> 2.30.2
>
Matthew Wilcox July 16, 2021, 3:18 a.m. UTC | #2
On Thu, Jul 15, 2021 at 02:41:06PM -0700, Darrick J. Wong wrote:
> > @@ -975,33 +975,33 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
> >  
> >  vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
> >  {
> > -	struct page *page = vmf->page;
> > +	struct folio *folio = page_folio(vmf->page);
> 
> If before the page fault the folio was a compound 2M page, will the
> memory manager will have split it into 4k pages before passing it to us?
> 
> That's a roundabout way of asking if we should expect folio_mkwrite at
> some point. ;)

Faults are tricky.  For ->fault, we need to know the precise page which
the fault occurred on (this detail is handled for you by filemap_fault()).
For mkwrite(), the page will not be split, so it's going to be a matter
of just marking the entire compound page as dirty (in the head page)
and making sure the filesystem is able to write back the entire folio.

Yes, there's going to be some write amplification here.  I believe
this will turn out to be a worthwhile tradeoff.  If I'm wrong, we can
implement some kind of split-on-fault.
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 7c702d6c2f64..a3fe0d36c739 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -951,23 +951,23 @@  iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 }
 EXPORT_SYMBOL_GPL(iomap_truncate_page);
 
-static loff_t
-iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap, struct iomap *srcmap)
+static loff_t iomap_folio_mkwrite_actor(struct inode *inode, loff_t pos,
+		loff_t length, void *data, struct iomap *iomap,
+		struct iomap *srcmap)
 {
-	struct page *page = data;
-	struct folio *folio = page_folio(page);
+	struct folio *folio = data;
 	int ret;
 
 	if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
-		ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+		ret = __block_write_begin_int(&folio->page, pos, length, NULL,
+						iomap);
 		if (ret)
 			return ret;
-		block_commit_write(page, 0, length);
+		block_commit_write(&folio->page, 0, length);
 	} else {
-		WARN_ON_ONCE(!PageUptodate(page));
+		WARN_ON_ONCE(!folio_test_uptodate(folio));
 		iomap_page_create(inode, folio);
-		set_page_dirty(page);
+		folio_mark_dirty(folio);
 	}
 
 	return length;
@@ -975,33 +975,33 @@  iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 
 vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 {
-	struct page *page = vmf->page;
+	struct folio *folio = page_folio(vmf->page);
 	struct inode *inode = file_inode(vmf->vma->vm_file);
-	unsigned long length;
-	loff_t offset;
+	size_t length;
+	loff_t pos;
 	ssize_t ret;
 
-	lock_page(page);
-	ret = page_mkwrite_check_truncate(page, inode);
+	folio_lock(folio);
+	ret = folio_mkwrite_check_truncate(folio, inode);
 	if (ret < 0)
 		goto out_unlock;
 	length = ret;
 
-	offset = page_offset(page);
+	pos = folio_pos(folio);
 	while (length > 0) {
-		ret = iomap_apply(inode, offset, length,
-				IOMAP_WRITE | IOMAP_FAULT, ops, page,
-				iomap_page_mkwrite_actor);
+		ret = iomap_apply(inode, pos, length,
+				IOMAP_WRITE | IOMAP_FAULT, ops, folio,
+				iomap_folio_mkwrite_actor);
 		if (unlikely(ret <= 0))
 			goto out_unlock;
-		offset += ret;
+		pos += ret;
 		length -= ret;
 	}
 
-	wait_for_stable_page(page);
+	folio_wait_stable(folio);
 	return VM_FAULT_LOCKED;
 out_unlock:
-	unlock_page(page);
+	folio_unlock(folio);
 	return block_page_mkwrite_return(ret);
 }
 EXPORT_SYMBOL_GPL(iomap_page_mkwrite);