[v2,3/5] iomap: Support large pages
diff mbox series

Message ID 20190821003039.12555-4-willy@infradead.org
State New
Headers show
Series
  • iomap & xfs support for large pages
Related show

Commit Message

Matthew Wilcox Aug. 21, 2019, 12:30 a.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

Change iomap_page from a statically sized uptodate bitmap to a dynamically
allocated uptodate bitmap, allowing an arbitrarily large page.

The only remaining places where iomap assumes an order-0 page are for
files with inline data, where there's no sense in allocating a larger
page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 119 ++++++++++++++++++++++++++---------------
 include/linux/iomap.h  |   2 +-
 include/linux/mm.h     |   2 +
 3 files changed, 80 insertions(+), 43 deletions(-)

Comments

Darrick J. Wong Sept. 18, 2019, 9:29 p.m. UTC | #1
On Tue, Aug 20, 2019 at 05:30:37PM -0700, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> Change iomap_page from a statically sized uptodate bitmap to a dynamically
> allocated uptodate bitmap, allowing an arbitrarily large page.
> 
> The only remaining places where iomap assumes an order-0 page are for
> files with inline data, where there's no sense in allocating a larger
> page.

I wonder, will anything bad happen if that occurs?  (XFS doesn't have
inline data for files so I have no idea...)

> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/buffered-io.c | 119 ++++++++++++++++++++++++++---------------
>  include/linux/iomap.h  |   2 +-
>  include/linux/mm.h     |   2 +
>  3 files changed, 80 insertions(+), 43 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 0e76a4b6d98a..15d844a88439 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -23,14 +23,14 @@ static struct iomap_page *
>  iomap_page_create(struct inode *inode, struct page *page)
>  {
>  	struct iomap_page *iop = to_iomap_page(page);
> +	unsigned int n;
>  
>  	if (iop || i_blocks_per_page(inode, page) <= 1)
>  		return iop;
>  
> -	iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
> -	atomic_set(&iop->read_count, 0);
> -	atomic_set(&iop->write_count, 0);
> -	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
> +	n = BITS_TO_LONGS(i_blocks_per_page(inode, page));
> +	iop = kmalloc(struct_size(iop, uptodate, n),
> +			GFP_NOFS | __GFP_NOFAIL | __GFP_ZERO);
>  
>  	/*
>  	 * migrate_page_move_mapping() assumes that pages with private data have
> @@ -61,15 +61,16 @@ iomap_page_release(struct page *page)
>   * Calculate the range inside the page that we actually need to read.
>   */
>  static void
> -iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
> +iomap_adjust_read_range(struct inode *inode, struct page *page,
>  		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
>  {
> +	struct iomap_page *iop = to_iomap_page(page);
>  	loff_t orig_pos = *pos;
>  	loff_t isize = i_size_read(inode);
>  	unsigned block_bits = inode->i_blkbits;
>  	unsigned block_size = (1 << block_bits);
> -	unsigned poff = offset_in_page(*pos);
> -	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
> +	unsigned poff = offset_in_this_page(page, *pos);
> +	unsigned plen = min_t(loff_t, page_size(page) - poff, length);
>  	unsigned first = poff >> block_bits;
>  	unsigned last = (poff + plen - 1) >> block_bits;
>  
> @@ -107,7 +108,8 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
>  	 * page cache for blocks that are entirely outside of i_size.
>  	 */
>  	if (orig_pos <= isize && orig_pos + length > isize) {
> -		unsigned end = offset_in_page(isize - 1) >> block_bits;
> +		unsigned end = offset_in_this_page(page, isize - 1) >>
> +				block_bits;
>  
>  		if (first <= end && last > end)
>  			plen -= (last - end) * block_size;
> @@ -121,19 +123,16 @@ static void
>  iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
>  {
>  	struct iomap_page *iop = to_iomap_page(page);
> -	struct inode *inode = page->mapping->host;
> -	unsigned first = off >> inode->i_blkbits;
> -	unsigned last = (off + len - 1) >> inode->i_blkbits;
> -	unsigned int i;
>  	bool uptodate = true;
>  
>  	if (iop) {
> -		for (i = 0; i < i_blocks_per_page(inode, page); i++) {
> -			if (i >= first && i <= last)
> -				set_bit(i, iop->uptodate);
> -			else if (!test_bit(i, iop->uptodate))
> -				uptodate = false;
> -		}
> +		struct inode *inode = page->mapping->host;
> +		unsigned first = off >> inode->i_blkbits;
> +		unsigned count = len >> inode->i_blkbits;
> +
> +		bitmap_set(iop->uptodate, first, count);
> +		if (!bitmap_full(iop->uptodate, i_blocks_per_page(inode, page)))
> +			uptodate = false;
>  	}
>  
>  	if (uptodate && !PageError(page))
> @@ -194,6 +193,7 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>  		return;
>  
>  	BUG_ON(page->index);
> +	BUG_ON(PageCompound(page));
>  	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
>  
>  	addr = kmap_atomic(page);
> @@ -203,6 +203,16 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>  	SetPageUptodate(page);
>  }
>  
> +/*
> + * Estimate the number of vectors we need based on the current page size;
> + * if we're wrong we'll end up doing an overly large allocation or needing
> + * to do a second allocation, neither of which is a big deal.
> + */
> +static unsigned int iomap_nr_vecs(struct page *page, loff_t length)
> +{
> +	return (length + page_size(page) - 1) >> page_shift(page);
> +}
> +
>  static loff_t
>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		struct iomap *iomap)
> @@ -222,7 +232,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  	}
>  
>  	/* zero post-eof blocks as the page may be mapped */
> -	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
> +	iomap_adjust_read_range(inode, page, &pos, length, &poff, &plen);
>  	if (plen == 0)
>  		goto done;
>  
> @@ -258,7 +268,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  
>  	if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) {
>  		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
> -		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
> +		int nr_vecs = iomap_nr_vecs(page, length);
>  
>  		if (ctx->bio)
>  			submit_bio(ctx->bio);
> @@ -293,9 +303,9 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
>  	unsigned poff;
>  	loff_t ret;
>  
> -	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
> -		ret = iomap_apply(inode, page_offset(page) + poff,
> -				PAGE_SIZE - poff, 0, ops, &ctx,
> +	for (poff = 0; poff < page_size(page); poff += ret) {
> +		ret = iomap_apply(inode, file_offset_of_page(page) + poff,
> +				page_size(page) - poff, 0, ops, &ctx,
>  				iomap_readpage_actor);
>  		if (ret <= 0) {
>  			WARN_ON_ONCE(ret == 0);
> @@ -328,7 +338,7 @@ iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
>  	while (!list_empty(pages)) {
>  		struct page *page = lru_to_page(pages);
>  
> -		if (page_offset(page) >= (u64)pos + length)
> +		if (file_offset_of_page(page) >= (u64)pos + length)
>  			break;
>  
>  		list_del(&page->lru);
> @@ -342,7 +352,7 @@ iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
>  		 * readpages call itself as every page gets checked again once
>  		 * actually needed.
>  		 */
> -		*done += PAGE_SIZE;
> +		*done += page_size(page);
>  		put_page(page);
>  	}
>  
> @@ -355,9 +365,14 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
>  {
>  	struct iomap_readpage_ctx *ctx = data;
>  	loff_t done, ret;
> +	size_t left = 0;
> +
> +	if (ctx->cur_page)
> +		left = page_size(ctx->cur_page) -
> +					offset_in_this_page(ctx->cur_page, pos);
>  
>  	for (done = 0; done < length; done += ret) {
> -		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
> +		if (ctx->cur_page && left == 0) {
>  			if (!ctx->cur_page_in_bio)
>  				unlock_page(ctx->cur_page);
>  			put_page(ctx->cur_page);
> @@ -369,14 +384,27 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
>  			if (!ctx->cur_page)
>  				break;
>  			ctx->cur_page_in_bio = false;
> +			left = page_size(ctx->cur_page);
>  		}
>  		ret = iomap_readpage_actor(inode, pos + done, length - done,
>  				ctx, iomap);
> +		left -= ret;
>  	}
>  
>  	return done;
>  }
>  
> +/* move to fs.h? */
> +static inline struct page *readahead_first_page(struct list_head *head)
> +{
> +	return list_entry(head->prev, struct page, lru);
> +}
> +
> +static inline struct page *readahead_last_page(struct list_head *head)
> +{
> +	return list_entry(head->next, struct page, lru);
> +}
> +
>  int
>  iomap_readpages(struct address_space *mapping, struct list_head *pages,
>  		unsigned nr_pages, const struct iomap_ops *ops)
> @@ -385,9 +413,10 @@ iomap_readpages(struct address_space *mapping, struct list_head *pages,
>  		.pages		= pages,
>  		.is_readahead	= true,
>  	};
> -	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
> -	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
> -	loff_t length = last - pos + PAGE_SIZE, ret = 0;
> +	loff_t pos = file_offset_of_page(readahead_first_page(pages));
> +	loff_t end = file_offset_of_next_page(readahead_last_page(pages));
> +	loff_t length = end - pos;
> +	loff_t ret = 0;
>  
>  	while (length > 0) {
>  		ret = iomap_apply(mapping->host, pos, length, 0, ops,
> @@ -410,7 +439,7 @@ iomap_readpages(struct address_space *mapping, struct list_head *pages,
>  	}
>  
>  	/*
> -	 * Check that we didn't lose a page due to the arcance calling
> +	 * Check that we didn't lose a page due to the arcane calling
>  	 * conventions..
>  	 */
>  	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
> @@ -435,7 +464,7 @@ iomap_is_partially_uptodate(struct page *page, unsigned long from,
>  	unsigned i;
>  
>  	/* Limit range to one page */
> -	len = min_t(unsigned, PAGE_SIZE - from, count);
> +	len = min_t(unsigned, page_size(page) - from, count);
>  
>  	/* First and last blocks in range within page */
>  	first = from >> inode->i_blkbits;
> @@ -474,7 +503,7 @@ iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
>  	 * If we are invalidating the entire page, clear the dirty state from it
>  	 * and release it to avoid unnecessary buildup of the LRU.
>  	 */
> -	if (offset == 0 && len == PAGE_SIZE) {
> +	if (offset == 0 && len == page_size(page)) {
>  		WARN_ON_ONCE(PageWriteback(page));
>  		cancel_dirty_page(page);
>  		iomap_page_release(page);
> @@ -550,18 +579,20 @@ static int
>  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
>  		struct page *page, struct iomap *iomap)
>  {
> -	struct iomap_page *iop = iomap_page_create(inode, page);
>  	loff_t block_size = i_blocksize(inode);
>  	loff_t block_start = pos & ~(block_size - 1);
>  	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
> -	unsigned from = offset_in_page(pos), to = from + len, poff, plen;
> +	unsigned from = offset_in_this_page(page, pos);
> +	unsigned to = from + len;
> +	unsigned poff, plen;
>  	int status = 0;
>  
>  	if (PageUptodate(page))
>  		return 0;
> +	iomap_page_create(inode, page);
>  
>  	do {
> -		iomap_adjust_read_range(inode, iop, &block_start,
> +		iomap_adjust_read_range(inode, page, &block_start,
>  				block_end - block_start, &poff, &plen);
>  		if (plen == 0)
>  			break;
> @@ -673,7 +704,7 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
>  	 */
>  	if (unlikely(copied < len && !PageUptodate(page)))
>  		return 0;
> -	iomap_set_range_uptodate(page, offset_in_page(pos), len);
> +	iomap_set_range_uptodate(page, offset_in_this_page(page, pos), len);
>  	iomap_set_page_dirty(page);
>  	return copied;
>  }
> @@ -685,6 +716,7 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
>  	void *addr;
>  
>  	WARN_ON_ONCE(!PageUptodate(page));
> +	BUG_ON(PageCompound(page));
>  	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
>  
>  	addr = kmap_atomic(page);
> @@ -749,6 +781,10 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		unsigned long bytes;	/* Bytes to write to page */
>  		size_t copied;		/* Bytes copied from user */
>  
> +		/*
> +		 * XXX: We don't know what size page we'll find in the
> +		 * page cache, so only copy up to a regular page boundary.

How might we fix this?

> +		 */
>  		offset = offset_in_page(pos);
>  		bytes = min_t(unsigned long, PAGE_SIZE - offset,
>  						iov_iter_count(i));
> @@ -1041,19 +1077,18 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
>  	lock_page(page);
>  	size = i_size_read(inode);
>  	if ((page->mapping != inode->i_mapping) ||
> -	    (page_offset(page) > size)) {
> +	    (file_offset_of_page(page) > size)) {
>  		/* We overload EFAULT to mean page got truncated */
>  		ret = -EFAULT;
>  		goto out_unlock;
>  	}
>  
> -	/* page is wholly or partially inside EOF */
> -	if (((page->index + 1) << PAGE_SHIFT) > size)
> -		length = offset_in_page(size);
> +	offset = file_offset_of_page(page);
> +	if (size - offset < page_size(page))
> +		length = offset_in_this_page(page, size);
>  	else
> -		length = PAGE_SIZE;
> +		length = page_size(page);
>  
> -	offset = page_offset(page);
>  	while (length > 0) {
>  		ret = iomap_apply(inode, offset, length,
>  				IOMAP_WRITE | IOMAP_FAULT, ops, page,
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index bc499ceae392..86be24a8259b 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -139,7 +139,7 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
>  struct iomap_page {
>  	atomic_t		read_count;
>  	atomic_t		write_count;
> -	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
> +	unsigned long		uptodate[];
>  };
>  
>  static inline struct iomap_page *to_iomap_page(struct page *page)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 726d7f046b49..6892cd712428 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1414,6 +1414,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
>  extern void pagefault_out_of_memory(void);
>  
>  #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
> +#define offset_in_this_page(page, p)	\
> +	((unsigned long)(p) & (page_size(page) - 1))

What's the difference between these two macros?  I guess the macro with
the longer name works for compound pages?  Whereas the first one only
works with order-0 pages?

--D

>  
>  /*
>   * Flags passed to show_mem() and show_free_areas() to suppress output in
> -- 
> 2.23.0.rc1
>

Patch
diff mbox series

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 0e76a4b6d98a..15d844a88439 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -23,14 +23,14 @@  static struct iomap_page *
 iomap_page_create(struct inode *inode, struct page *page)
 {
 	struct iomap_page *iop = to_iomap_page(page);
+	unsigned int n;
 
 	if (iop || i_blocks_per_page(inode, page) <= 1)
 		return iop;
 
-	iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
-	atomic_set(&iop->read_count, 0);
-	atomic_set(&iop->write_count, 0);
-	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+	n = BITS_TO_LONGS(i_blocks_per_page(inode, page));
+	iop = kmalloc(struct_size(iop, uptodate, n),
+			GFP_NOFS | __GFP_NOFAIL | __GFP_ZERO);
 
 	/*
 	 * migrate_page_move_mapping() assumes that pages with private data have
@@ -61,15 +61,16 @@  iomap_page_release(struct page *page)
  * Calculate the range inside the page that we actually need to read.
  */
 static void
-iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
+iomap_adjust_read_range(struct inode *inode, struct page *page,
 		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
 {
+	struct iomap_page *iop = to_iomap_page(page);
 	loff_t orig_pos = *pos;
 	loff_t isize = i_size_read(inode);
 	unsigned block_bits = inode->i_blkbits;
 	unsigned block_size = (1 << block_bits);
-	unsigned poff = offset_in_page(*pos);
-	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	unsigned poff = offset_in_this_page(page, *pos);
+	unsigned plen = min_t(loff_t, page_size(page) - poff, length);
 	unsigned first = poff >> block_bits;
 	unsigned last = (poff + plen - 1) >> block_bits;
 
@@ -107,7 +108,8 @@  iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
 	 * page cache for blocks that are entirely outside of i_size.
 	 */
 	if (orig_pos <= isize && orig_pos + length > isize) {
-		unsigned end = offset_in_page(isize - 1) >> block_bits;
+		unsigned end = offset_in_this_page(page, isize - 1) >>
+				block_bits;
 
 		if (first <= end && last > end)
 			plen -= (last - end) * block_size;
@@ -121,19 +123,16 @@  static void
 iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
 {
 	struct iomap_page *iop = to_iomap_page(page);
-	struct inode *inode = page->mapping->host;
-	unsigned first = off >> inode->i_blkbits;
-	unsigned last = (off + len - 1) >> inode->i_blkbits;
-	unsigned int i;
 	bool uptodate = true;
 
 	if (iop) {
-		for (i = 0; i < i_blocks_per_page(inode, page); i++) {
-			if (i >= first && i <= last)
-				set_bit(i, iop->uptodate);
-			else if (!test_bit(i, iop->uptodate))
-				uptodate = false;
-		}
+		struct inode *inode = page->mapping->host;
+		unsigned first = off >> inode->i_blkbits;
+		unsigned count = len >> inode->i_blkbits;
+
+		bitmap_set(iop->uptodate, first, count);
+		if (!bitmap_full(iop->uptodate, i_blocks_per_page(inode, page)))
+			uptodate = false;
 	}
 
 	if (uptodate && !PageError(page))
@@ -194,6 +193,7 @@  iomap_read_inline_data(struct inode *inode, struct page *page,
 		return;
 
 	BUG_ON(page->index);
+	BUG_ON(PageCompound(page));
 	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
 
 	addr = kmap_atomic(page);
@@ -203,6 +203,16 @@  iomap_read_inline_data(struct inode *inode, struct page *page,
 	SetPageUptodate(page);
 }
 
+/*
+ * Estimate the number of vectors we need based on the current page size;
+ * if we're wrong we'll end up doing an overly large allocation or needing
+ * to do a second allocation, neither of which is a big deal.
+ */
+static unsigned int iomap_nr_vecs(struct page *page, loff_t length)
+{
+	return (length + page_size(page) - 1) >> page_shift(page);
+}
+
 static loff_t
 iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		struct iomap *iomap)
@@ -222,7 +232,7 @@  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	}
 
 	/* zero post-eof blocks as the page may be mapped */
-	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
+	iomap_adjust_read_range(inode, page, &pos, length, &poff, &plen);
 	if (plen == 0)
 		goto done;
 
@@ -258,7 +268,7 @@  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 	if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) {
 		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
-		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		int nr_vecs = iomap_nr_vecs(page, length);
 
 		if (ctx->bio)
 			submit_bio(ctx->bio);
@@ -293,9 +303,9 @@  iomap_readpage(struct page *page, const struct iomap_ops *ops)
 	unsigned poff;
 	loff_t ret;
 
-	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
-		ret = iomap_apply(inode, page_offset(page) + poff,
-				PAGE_SIZE - poff, 0, ops, &ctx,
+	for (poff = 0; poff < page_size(page); poff += ret) {
+		ret = iomap_apply(inode, file_offset_of_page(page) + poff,
+				page_size(page) - poff, 0, ops, &ctx,
 				iomap_readpage_actor);
 		if (ret <= 0) {
 			WARN_ON_ONCE(ret == 0);
@@ -328,7 +338,7 @@  iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
 	while (!list_empty(pages)) {
 		struct page *page = lru_to_page(pages);
 
-		if (page_offset(page) >= (u64)pos + length)
+		if (file_offset_of_page(page) >= (u64)pos + length)
 			break;
 
 		list_del(&page->lru);
@@ -342,7 +352,7 @@  iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
 		 * readpages call itself as every page gets checked again once
 		 * actually needed.
 		 */
-		*done += PAGE_SIZE;
+		*done += page_size(page);
 		put_page(page);
 	}
 
@@ -355,9 +365,14 @@  iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 {
 	struct iomap_readpage_ctx *ctx = data;
 	loff_t done, ret;
+	size_t left = 0;
+
+	if (ctx->cur_page)
+		left = page_size(ctx->cur_page) -
+					offset_in_this_page(ctx->cur_page, pos);
 
 	for (done = 0; done < length; done += ret) {
-		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
+		if (ctx->cur_page && left == 0) {
 			if (!ctx->cur_page_in_bio)
 				unlock_page(ctx->cur_page);
 			put_page(ctx->cur_page);
@@ -369,14 +384,27 @@  iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 			if (!ctx->cur_page)
 				break;
 			ctx->cur_page_in_bio = false;
+			left = page_size(ctx->cur_page);
 		}
 		ret = iomap_readpage_actor(inode, pos + done, length - done,
 				ctx, iomap);
+		left -= ret;
 	}
 
 	return done;
 }
 
+/* move to fs.h? */
+static inline struct page *readahead_first_page(struct list_head *head)
+{
+	return list_entry(head->prev, struct page, lru);
+}
+
+static inline struct page *readahead_last_page(struct list_head *head)
+{
+	return list_entry(head->next, struct page, lru);
+}
+
 int
 iomap_readpages(struct address_space *mapping, struct list_head *pages,
 		unsigned nr_pages, const struct iomap_ops *ops)
@@ -385,9 +413,10 @@  iomap_readpages(struct address_space *mapping, struct list_head *pages,
 		.pages		= pages,
 		.is_readahead	= true,
 	};
-	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
-	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
-	loff_t length = last - pos + PAGE_SIZE, ret = 0;
+	loff_t pos = file_offset_of_page(readahead_first_page(pages));
+	loff_t end = file_offset_of_next_page(readahead_last_page(pages));
+	loff_t length = end - pos;
+	loff_t ret = 0;
 
 	while (length > 0) {
 		ret = iomap_apply(mapping->host, pos, length, 0, ops,
@@ -410,7 +439,7 @@  iomap_readpages(struct address_space *mapping, struct list_head *pages,
 	}
 
 	/*
-	 * Check that we didn't lose a page due to the arcance calling
+	 * Check that we didn't lose a page due to the arcane calling
 	 * conventions..
 	 */
 	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
@@ -435,7 +464,7 @@  iomap_is_partially_uptodate(struct page *page, unsigned long from,
 	unsigned i;
 
 	/* Limit range to one page */
-	len = min_t(unsigned, PAGE_SIZE - from, count);
+	len = min_t(unsigned, page_size(page) - from, count);
 
 	/* First and last blocks in range within page */
 	first = from >> inode->i_blkbits;
@@ -474,7 +503,7 @@  iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
 	 * If we are invalidating the entire page, clear the dirty state from it
 	 * and release it to avoid unnecessary buildup of the LRU.
 	 */
-	if (offset == 0 && len == PAGE_SIZE) {
+	if (offset == 0 && len == page_size(page)) {
 		WARN_ON_ONCE(PageWriteback(page));
 		cancel_dirty_page(page);
 		iomap_page_release(page);
@@ -550,18 +579,20 @@  static int
 __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
 		struct page *page, struct iomap *iomap)
 {
-	struct iomap_page *iop = iomap_page_create(inode, page);
 	loff_t block_size = i_blocksize(inode);
 	loff_t block_start = pos & ~(block_size - 1);
 	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
-	unsigned from = offset_in_page(pos), to = from + len, poff, plen;
+	unsigned from = offset_in_this_page(page, pos);
+	unsigned to = from + len;
+	unsigned poff, plen;
 	int status = 0;
 
 	if (PageUptodate(page))
 		return 0;
+	iomap_page_create(inode, page);
 
 	do {
-		iomap_adjust_read_range(inode, iop, &block_start,
+		iomap_adjust_read_range(inode, page, &block_start,
 				block_end - block_start, &poff, &plen);
 		if (plen == 0)
 			break;
@@ -673,7 +704,7 @@  __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 	 */
 	if (unlikely(copied < len && !PageUptodate(page)))
 		return 0;
-	iomap_set_range_uptodate(page, offset_in_page(pos), len);
+	iomap_set_range_uptodate(page, offset_in_this_page(page, pos), len);
 	iomap_set_page_dirty(page);
 	return copied;
 }
@@ -685,6 +716,7 @@  iomap_write_end_inline(struct inode *inode, struct page *page,
 	void *addr;
 
 	WARN_ON_ONCE(!PageUptodate(page));
+	BUG_ON(PageCompound(page));
 	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
 
 	addr = kmap_atomic(page);
@@ -749,6 +781,10 @@  iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		unsigned long bytes;	/* Bytes to write to page */
 		size_t copied;		/* Bytes copied from user */
 
+		/*
+		 * XXX: We don't know what size page we'll find in the
+		 * page cache, so only copy up to a regular page boundary.
+		 */
 		offset = offset_in_page(pos);
 		bytes = min_t(unsigned long, PAGE_SIZE - offset,
 						iov_iter_count(i));
@@ -1041,19 +1077,18 @@  vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 	lock_page(page);
 	size = i_size_read(inode);
 	if ((page->mapping != inode->i_mapping) ||
-	    (page_offset(page) > size)) {
+	    (file_offset_of_page(page) > size)) {
 		/* We overload EFAULT to mean page got truncated */
 		ret = -EFAULT;
 		goto out_unlock;
 	}
 
-	/* page is wholly or partially inside EOF */
-	if (((page->index + 1) << PAGE_SHIFT) > size)
-		length = offset_in_page(size);
+	offset = file_offset_of_page(page);
+	if (size - offset < page_size(page))
+		length = offset_in_this_page(page, size);
 	else
-		length = PAGE_SIZE;
+		length = page_size(page);
 
-	offset = page_offset(page);
 	while (length > 0) {
 		ret = iomap_apply(inode, offset, length,
 				IOMAP_WRITE | IOMAP_FAULT, ops, page,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index bc499ceae392..86be24a8259b 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -139,7 +139,7 @@  loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
 struct iomap_page {
 	atomic_t		read_count;
 	atomic_t		write_count;
-	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
+	unsigned long		uptodate[];
 };
 
 static inline struct iomap_page *to_iomap_page(struct page *page)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 726d7f046b49..6892cd712428 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1414,6 +1414,8 @@  static inline void clear_page_pfmemalloc(struct page *page)
 extern void pagefault_out_of_memory(void);
 
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
+#define offset_in_this_page(page, p)	\
+	((unsigned long)(p) & (page_size(page) - 1))
 
 /*
  * Flags passed to show_mem() and show_free_areas() to suppress output in