diff mbox series

[09/14] iomap: Change iomap_write_begin calling convention

Message ID 20201014030357.21898-10-willy@infradead.org (mailing list archive)
State Deferred, archived
Headers show
Series Transparent Huge Page support for XFS | expand

Commit Message

Matthew Wilcox Oct. 14, 2020, 3:03 a.m. UTC
Pass (up to) the remaining length of the extent to iomap_write_begin()
and have it return the number of bytes that will fit in the page.
That lets us copy more bytes per call to iomap_write_begin() if the page
cache has already allocated a THP (and will in future allow us to pass
a hint to the page cache that it should try to allocate a larger page
if there are none in the cache).

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 61 +++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 28 deletions(-)

Comments

Darrick J. Wong Oct. 14, 2020, 4:47 p.m. UTC | #1
On Wed, Oct 14, 2020 at 04:03:52AM +0100, Matthew Wilcox (Oracle) wrote:
> Pass (up to) the remaining length of the extent to iomap_write_begin()
> and have it return the number of bytes that will fit in the page.
> That lets us copy more bytes per call to iomap_write_begin() if the page
> cache has already allocated a THP (and will in future allow us to pass
> a hint to the page cache that it should try to allocate a larger page
> if there are none in the cache).
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/buffered-io.c | 61 +++++++++++++++++++++++-------------------
>  1 file changed, 33 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 4ef02afaedc5..397795db3ce5 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -616,14 +616,14 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
>  	return submit_bio_wait(&bio);
>  }
>  
> -static int
> -__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> -		struct page *page, struct iomap *srcmap)
> +static ssize_t __iomap_write_begin(struct inode *inode, loff_t pos,
> +		size_t len, int flags, struct page *page, struct iomap *srcmap)
>  {
>  	loff_t block_size = i_blocksize(inode);
>  	loff_t block_start = pos & ~(block_size - 1);
>  	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
> -	unsigned from = offset_in_page(pos), to = from + len;
> +	size_t from = offset_in_thp(page, pos);
> +	size_t to = from + len;
>  	size_t poff, plen;
>  
>  	if (PageUptodate(page))
> @@ -658,12 +658,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  	return 0;
>  }
>  
> -static int
> -iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> -		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> +static ssize_t iomap_write_begin(struct inode *inode, loff_t pos, loff_t len,
> +		unsigned flags, struct page **pagep, struct iomap *iomap,

loff_t len?  You've been using size_t (ssize_t?) for length elsewhere,
can't return more than ssize_t, and afaik MAX_RW_COUNT will never go
larger than 2GB so I'm confused about types here...?

Mostly because my brain has been trained to think that if it sees
"size_t len" as an input parameter and a ssize_t return value, then
probably the return value is however much of @len we managed to process.

> +		struct iomap *srcmap)
>  {
>  	const struct iomap_page_ops *page_ops = iomap->page_ops;
>  	struct page *page;
> +	size_t offset;
>  	int status = 0;
>  
>  	BUG_ON(pos + len > iomap->offset + iomap->length);
> @@ -674,6 +675,8 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		return -EINTR;
>  
>  	if (page_ops && page_ops->page_prepare) {
> +		if (len > UINT_MAX)
> +			len = UINT_MAX;

I'm not especially familiar with page_prepare (since it's a gfs2 thing);
why do you clamp len to UINT_MAX here?

--D

>  		status = page_ops->page_prepare(inode, pos, len, iomap);
>  		if (status)
>  			return status;
> @@ -685,6 +688,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		status = -ENOMEM;
>  		goto out_no_page;
>  	}
> +	page = thp_head(page);
> +	offset = offset_in_thp(page, pos);
> +	if (len > thp_size(page) - offset)
> +		len = thp_size(page) - offset;
>  
>  	if (srcmap->type == IOMAP_INLINE)
>  		iomap_read_inline_data(inode, page, srcmap);
> @@ -694,11 +701,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		status = __iomap_write_begin(inode, pos, len, flags, page,
>  				srcmap);
>  
> -	if (unlikely(status))
> +	if (status < 0)
>  		goto out_unlock;
>  
>  	*pagep = page;
> -	return 0;
> +	return len;
>  
>  out_unlock:
>  	unlock_page(page);
> @@ -854,8 +861,10 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  
>  		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
>  				srcmap);
> -		if (unlikely(status))
> +		if (status < 0)
>  			break;
> +		/* We may be partway through a THP */
> +		offset = offset_in_thp(page, pos);
>  
>  		if (mapping_writably_mapped(inode->i_mapping))
>  			flush_dcache_page(page);
> @@ -915,7 +924,6 @@ static loff_t
>  iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		struct iomap *iomap, struct iomap *srcmap)
>  {
> -	long status = 0;
>  	loff_t written = 0;
>  
>  	/* don't bother with blocks that are not shared to start with */
> @@ -926,25 +934,24 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		return length;
>  
>  	do {
> -		unsigned long offset = offset_in_page(pos);
> -		unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
>  		struct page *page;
> +		ssize_t bytes;
>  
> -		status = iomap_write_begin(inode, pos, bytes,
> +		bytes = iomap_write_begin(inode, pos, length,
>  				IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
> -		if (unlikely(status))
> -			return status;
> +		if (bytes < 0)
> +			return bytes;
>  
> -		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
> +		bytes = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
>  				srcmap);
> -		if (WARN_ON_ONCE(status == 0))
> +		if (WARN_ON_ONCE(bytes == 0))
>  			return -EIO;
>  
>  		cond_resched();
>  
> -		pos += status;
> -		written += status;
> -		length -= status;
> +		pos += bytes;
> +		written += bytes;
> +		length -= bytes;
>  
>  		balance_dirty_pages_ratelimited(inode->i_mapping);
>  	} while (length);
> @@ -975,15 +982,13 @@ static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
>  		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct page *page;
> -	int status;
> -	unsigned offset = offset_in_page(pos);
> -	unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
> +	ssize_t bytes;
>  
> -	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
> -	if (status)
> -		return status;
> +	bytes = iomap_write_begin(inode, pos, length, 0, &page, iomap, srcmap);
> +	if (bytes < 0)
> +		return bytes;
>  
> -	zero_user(page, offset, bytes);
> +	zero_user(page, offset_in_thp(page, pos), bytes);
>  	mark_page_accessed(page);
>  
>  	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
> -- 
> 2.28.0
>
Matthew Wilcox Oct. 14, 2020, 5:41 p.m. UTC | #2
On Wed, Oct 14, 2020 at 09:47:44AM -0700, Darrick J. Wong wrote:
> > -static int
> > -iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> > -		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> > +static ssize_t iomap_write_begin(struct inode *inode, loff_t pos, loff_t len,
> > +		unsigned flags, struct page **pagep, struct iomap *iomap,
> 
> loff_t len?  You've been using size_t (ssize_t?) for length elsewhere,
> can't return more than ssize_t, and afaik MAX_RW_COUNT will never go
> larger than 2GB so I'm confused about types here...?

Yes, you're right.  This one should be size_t.

> >  	if (page_ops && page_ops->page_prepare) {
> > +		if (len > UINT_MAX)
> > +			len = UINT_MAX;
> 
> I'm not especially familiar with page_prepare (since it's a gfs2 thing);
> why do you clamp len to UINT_MAX here?

The len parameter of ->page_prepare is an unsigned int.  I don't want
a 1<<32+1 byte I/O to be seen as a 1 byte I/O.  We could upgrade the
parameter to size_t from unsigned int?
Matthew Wilcox Oct. 14, 2020, 6:08 p.m. UTC | #3
On Wed, Oct 14, 2020 at 06:41:58PM +0100, Matthew Wilcox wrote:
> On Wed, Oct 14, 2020 at 09:47:44AM -0700, Darrick J. Wong wrote:
> > > -static int
> > > -iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> > > -		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> > > +static ssize_t iomap_write_begin(struct inode *inode, loff_t pos, loff_t len,
> > > +		unsigned flags, struct page **pagep, struct iomap *iomap,
> > 
> > loff_t len?  You've been using size_t (ssize_t?) for length elsewhere,
> > can't return more than ssize_t, and afaik MAX_RW_COUNT will never go
> > larger than 2GB so I'm confused about types here...?
> 
> Yes, you're right.  This one should be size_t.
> 
> > >  	if (page_ops && page_ops->page_prepare) {
> > > +		if (len > UINT_MAX)
> > > +			len = UINT_MAX;
> > 
> > I'm not especially familiar with page_prepare (since it's a gfs2 thing);
> > why do you clamp len to UINT_MAX here?
> 
> The len parameter of ->page_prepare is an unsigned int.  I don't want
> a 1<<32+1 byte I/O to be seen as a 1 byte I/O.  We could upgrade the
> parameter to size_t from unsigned int?

OK, here's what I have -- two patches (copy-and-wasted):

    iomap: Prepare page ops for larger I/Os
    
    Just adjust the types for the page_prepare() and page_done() callbacks
    to size_t from unsigned int.  While I don't see individual pages that
    large in our near future, it's convenient to be able to pass in larger
    lengths and has no cost.
    
    Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0f69fbd4af66..9a94d7e58199 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1028,7 +1028,7 @@ static void gfs2_write_unlock(struct inode *inode)
 }
 
 static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
-                                  unsigned len, struct iomap *iomap)
+                                  size_t len, struct iomap *iomap)
 {
        unsigned int blockmask = i_blocksize(inode) - 1;
        struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1039,7 +1039,7 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
 }
 
 static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
-                                unsigned copied, struct page *page,
+                                size_t copied, struct page *page,
                                 struct iomap *iomap)
 {
        struct gfs2_trans *tr = current->journal_info;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 4d1d3c3469e9..d3b06bffd996 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -106,9 +106,9 @@ iomap_sector(struct iomap *iomap, loff_t pos)
  * associated page could not be obtained.
  */
 struct iomap_page_ops {
-       int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len,
+       int (*page_prepare)(struct inode *inode, loff_t pos, size_t len,
                        struct iomap *iomap);
-       void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
+       void (*page_done)(struct inode *inode, loff_t pos, size_t copied,
                        struct page *page, struct iomap *iomap);
 };
 


The other is an amendment of this:

-static int
-iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags
,
-               struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
+static ssize_t iomap_write_begin(struct inode *inode, loff_t pos, size_t len,
+               unsigned flags, struct page **pagep, struct iomap *iomap,
+               struct iomap *srcmap)

and I drop the clamp to UINT_MAX.  I also added:

-               unsigned long offset;   /* Offset into pagecache page */
-               unsigned long bytes;    /* Bytes to write to page */
+               size_t offset;          /* Offset into pagecache page */
+               size_t bytes;           /* Bytes to write to page */

which won't change anything, but does fit the general pattern of
using size_t for a byte count of in-memory things.  It matches
iomap_unshare_actor(), for example.
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 4ef02afaedc5..397795db3ce5 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -616,14 +616,14 @@  iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
 	return submit_bio_wait(&bio);
 }
 
-static int
-__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
-		struct page *page, struct iomap *srcmap)
+static ssize_t __iomap_write_begin(struct inode *inode, loff_t pos,
+		size_t len, int flags, struct page *page, struct iomap *srcmap)
 {
 	loff_t block_size = i_blocksize(inode);
 	loff_t block_start = pos & ~(block_size - 1);
 	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
-	unsigned from = offset_in_page(pos), to = from + len;
+	size_t from = offset_in_thp(page, pos);
+	size_t to = from + len;
 	size_t poff, plen;
 
 	if (PageUptodate(page))
@@ -658,12 +658,13 @@  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 	return 0;
 }
 
-static int
-iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
+static ssize_t iomap_write_begin(struct inode *inode, loff_t pos, loff_t len,
+		unsigned flags, struct page **pagep, struct iomap *iomap,
+		struct iomap *srcmap)
 {
 	const struct iomap_page_ops *page_ops = iomap->page_ops;
 	struct page *page;
+	size_t offset;
 	int status = 0;
 
 	BUG_ON(pos + len > iomap->offset + iomap->length);
@@ -674,6 +675,8 @@  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		return -EINTR;
 
 	if (page_ops && page_ops->page_prepare) {
+		if (len > UINT_MAX)
+			len = UINT_MAX;
 		status = page_ops->page_prepare(inode, pos, len, iomap);
 		if (status)
 			return status;
@@ -685,6 +688,10 @@  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		status = -ENOMEM;
 		goto out_no_page;
 	}
+	page = thp_head(page);
+	offset = offset_in_thp(page, pos);
+	if (len > thp_size(page) - offset)
+		len = thp_size(page) - offset;
 
 	if (srcmap->type == IOMAP_INLINE)
 		iomap_read_inline_data(inode, page, srcmap);
@@ -694,11 +701,11 @@  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		status = __iomap_write_begin(inode, pos, len, flags, page,
 				srcmap);
 
-	if (unlikely(status))
+	if (status < 0)
 		goto out_unlock;
 
 	*pagep = page;
-	return 0;
+	return len;
 
 out_unlock:
 	unlock_page(page);
@@ -854,8 +861,10 @@  iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
 				srcmap);
-		if (unlikely(status))
+		if (status < 0)
 			break;
+		/* We may be partway through a THP */
+		offset = offset_in_thp(page, pos);
 
 		if (mapping_writably_mapped(inode->i_mapping))
 			flush_dcache_page(page);
@@ -915,7 +924,6 @@  static loff_t
 iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		struct iomap *iomap, struct iomap *srcmap)
 {
-	long status = 0;
 	loff_t written = 0;
 
 	/* don't bother with blocks that are not shared to start with */
@@ -926,25 +934,24 @@  iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		return length;
 
 	do {
-		unsigned long offset = offset_in_page(pos);
-		unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
 		struct page *page;
+		ssize_t bytes;
 
-		status = iomap_write_begin(inode, pos, bytes,
+		bytes = iomap_write_begin(inode, pos, length,
 				IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
-		if (unlikely(status))
-			return status;
+		if (bytes < 0)
+			return bytes;
 
-		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
+		bytes = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
 				srcmap);
-		if (WARN_ON_ONCE(status == 0))
+		if (WARN_ON_ONCE(bytes == 0))
 			return -EIO;
 
 		cond_resched();
 
-		pos += status;
-		written += status;
-		length -= status;
+		pos += bytes;
+		written += bytes;
+		length -= bytes;
 
 		balance_dirty_pages_ratelimited(inode->i_mapping);
 	} while (length);
@@ -975,15 +982,13 @@  static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
 		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct page *page;
-	int status;
-	unsigned offset = offset_in_page(pos);
-	unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
+	ssize_t bytes;
 
-	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
-	if (status)
-		return status;
+	bytes = iomap_write_begin(inode, pos, length, 0, &page, iomap, srcmap);
+	if (bytes < 0)
+		return bytes;
 
-	zero_user(page, offset, bytes);
+	zero_user(page, offset_in_thp(page, pos), bytes);
 	mark_page_accessed(page);
 
 	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);