diff mbox series

[RFC] filemap: Convert generic_perform_write() to support large folios

Message ID 20230822200937.159934-1-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series [RFC] filemap: Convert generic_perform_write() to support large folios | expand

Commit Message

Matthew Wilcox Aug. 22, 2023, 8:09 p.m. UTC
Modelled after the loop in iomap_write_iter(), copy larger chunks from
userspace if the filesystem has created large folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
This patch dependss on patches currently in the iomap tree.  Sending it
out now for feedback, but I'll resend it after rc1.

 mm/filemap.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

Comments

Darrick J. Wong Aug. 22, 2023, 9:17 p.m. UTC | #1
On Tue, Aug 22, 2023 at 09:09:37PM +0100, Matthew Wilcox (Oracle) wrote:
> Modelled after the loop in iomap_write_iter(), copy larger chunks from
> userspace if the filesystem has created large folios.

Hum.  Which filesystems are those?  Is this for the in-memory ones like
tmpfs?

--D

> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
> This patch dependss on patches currently in the iomap tree.  Sending it
> out now for feedback, but I'll resend it after rc1.
> 
>  mm/filemap.c | 34 ++++++++++++++++++++--------------
>  1 file changed, 20 insertions(+), 14 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index bf6219d9aaac..fd28767c760a 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -3908,6 +3908,7 @@ EXPORT_SYMBOL(generic_file_direct_write);
>  ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
>  {
>  	struct file *file = iocb->ki_filp;
> +	size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
>  	loff_t pos = iocb->ki_pos;
>  	struct address_space *mapping = file->f_mapping;
>  	const struct address_space_operations *a_ops = mapping->a_ops;
> @@ -3916,16 +3917,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
>  
>  	do {
>  		struct page *page;
> -		unsigned long offset;	/* Offset into pagecache page */
> -		unsigned long bytes;	/* Bytes to write to page */
> +		struct folio *folio;
> +		size_t offset;		/* Offset into folio */
> +		size_t bytes;		/* Bytes to write to folio */
>  		size_t copied;		/* Bytes copied from user */
>  		void *fsdata = NULL;
>  
> -		offset = (pos & (PAGE_SIZE - 1));
> -		bytes = min_t(unsigned long, PAGE_SIZE - offset,
> -						iov_iter_count(i));
> +		offset = pos & (chunk - 1);
> +		bytes = min(chunk - offset, iov_iter_count(i));
> +		balance_dirty_pages_ratelimited(mapping);
>  
> -again:
>  		/*
>  		 * Bring in the user page that we will copy from _first_.
>  		 * Otherwise there's a nasty deadlock on copying from the
> @@ -3947,11 +3948,16 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
>  		if (unlikely(status < 0))
>  			break;
>  
> +		folio = page_folio(page);
> +		offset = offset_in_folio(folio, pos);
> +		if (bytes > folio_size(folio) - offset)
> +			bytes = folio_size(folio) - offset;
> +
>  		if (mapping_writably_mapped(mapping))
> -			flush_dcache_page(page);
> +			flush_dcache_folio(folio);
>  
> -		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
> -		flush_dcache_page(page);
> +		copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
> +		flush_dcache_folio(folio);
>  
>  		status = a_ops->write_end(file, mapping, pos, bytes, copied,
>  						page, fsdata);
> @@ -3971,12 +3977,12 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
>  			 */
>  			if (copied)
>  				bytes = copied;
> -			goto again;
> +			if (chunk > PAGE_SIZE)
> +				chunk /= 2;
> +		} else {
> +			pos += status;
> +			written += status;
>  		}
> -		pos += status;
> -		written += status;
> -
> -		balance_dirty_pages_ratelimited(mapping);
>  	} while (iov_iter_count(i));
>  
>  	if (!written)
> -- 
> 2.40.1
>
Matthew Wilcox Aug. 22, 2023, 9:56 p.m. UTC | #2
On Tue, Aug 22, 2023 at 02:17:20PM -0700, Darrick J. Wong wrote:
> On Tue, Aug 22, 2023 at 09:09:37PM +0100, Matthew Wilcox (Oracle) wrote:
> > Modelled after the loop in iomap_write_iter(), copy larger chunks from
> > userspace if the filesystem has created large folios.
> 
> Hum.  Which filesystems are those?  Is this for the in-memory ones like
> tmpfs?

Alas tmpfs uses its own shmem_file_read_iter() and doesn't call back
into generic_perform_write().  But I was looking at the ramfs aops and
thinking those looked ripe for large folio support, so I thought I'd take
care of this part first since it potentially affects every filesystem
that uses generic_file_write_iter() / __generic_file_write_iter() /
generic_perform_write().

This is also a great opporunity for someone to tell me "Actually I have
plans in this area and ..."
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index bf6219d9aaac..fd28767c760a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3908,6 +3908,7 @@  EXPORT_SYMBOL(generic_file_direct_write);
 ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
 {
 	struct file *file = iocb->ki_filp;
+	size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
 	loff_t pos = iocb->ki_pos;
 	struct address_space *mapping = file->f_mapping;
 	const struct address_space_operations *a_ops = mapping->a_ops;
@@ -3916,16 +3917,16 @@  ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
 
 	do {
 		struct page *page;
-		unsigned long offset;	/* Offset into pagecache page */
-		unsigned long bytes;	/* Bytes to write to page */
+		struct folio *folio;
+		size_t offset;		/* Offset into folio */
+		size_t bytes;		/* Bytes to write to folio */
 		size_t copied;		/* Bytes copied from user */
 		void *fsdata = NULL;
 
-		offset = (pos & (PAGE_SIZE - 1));
-		bytes = min_t(unsigned long, PAGE_SIZE - offset,
-						iov_iter_count(i));
+		offset = pos & (chunk - 1);
+		bytes = min(chunk - offset, iov_iter_count(i));
+		balance_dirty_pages_ratelimited(mapping);
 
-again:
 		/*
 		 * Bring in the user page that we will copy from _first_.
 		 * Otherwise there's a nasty deadlock on copying from the
@@ -3947,11 +3948,16 @@  ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
 		if (unlikely(status < 0))
 			break;
 
+		folio = page_folio(page);
+		offset = offset_in_folio(folio, pos);
+		if (bytes > folio_size(folio) - offset)
+			bytes = folio_size(folio) - offset;
+
 		if (mapping_writably_mapped(mapping))
-			flush_dcache_page(page);
+			flush_dcache_folio(folio);
 
-		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
-		flush_dcache_page(page);
+		copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
+		flush_dcache_folio(folio);
 
 		status = a_ops->write_end(file, mapping, pos, bytes, copied,
 						page, fsdata);
@@ -3971,12 +3977,12 @@  ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
 			 */
 			if (copied)
 				bytes = copied;
-			goto again;
+			if (chunk > PAGE_SIZE)
+				chunk /= 2;
+		} else {
+			pos += status;
+			written += status;
 		}
-		pos += status;
-		written += status;
-
-		balance_dirty_pages_ratelimited(mapping);
 	} while (iov_iter_count(i));
 
 	if (!written)