diff mbox series

[5/9] iomap: Support arbitrarily many blocks per page

Message ID 20200824145511.10500-6-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series THP iomap patches for 5.10 | expand

Commit Message

Matthew Wilcox Aug. 24, 2020, 2:55 p.m. UTC
Size the uptodate array dynamically to support larger pages in the
page cache.  With a 64kB page, we're only saving 8 bytes per page today,
but with a 2MB maximum page size, we'd have to allocate more than 4kB
per page.  Add a few debugging assertions.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

Comments

Dave Chinner Aug. 24, 2020, 11:59 p.m. UTC | #1
On Mon, Aug 24, 2020 at 03:55:06PM +0100, Matthew Wilcox (Oracle) wrote:
> Size the uptodate array dynamically to support larger pages in the
> page cache.  With a 64kB page, we're only saving 8 bytes per page today,
> but with a 2MB maximum page size, we'd have to allocate more than 4kB
> per page.  Add a few debugging assertions.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/buffered-io.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index dbf9572dabe9..844e95cacea8 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -22,18 +22,19 @@
>  #include "../internal.h"
>  
>  /*
> - * Structure allocated for each page when block size < PAGE_SIZE to track
> + * Structure allocated for each page when block size < page size to track
>   * sub-page uptodate status and I/O completions.
>   */
>  struct iomap_page {
>  	atomic_t		read_count;
>  	atomic_t		write_count;
>  	spinlock_t		uptodate_lock;
> -	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
> +	unsigned long		uptodate[];
>  };
>  
>  static inline struct iomap_page *to_iomap_page(struct page *page)
>  {
> +	VM_BUG_ON_PGFLAGS(PageTail(page), page);
>  	if (page_has_private(page))
>  		return (struct iomap_page *)page_private(page);
>  	return NULL;

Just to confirm: this vm bug check is to needed becuse we only
attach the iomap_page to the head page of a compound page?

Assuming that I've understood the above correctly:

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Matthew Wilcox Aug. 25, 2020, 12:22 a.m. UTC | #2
On Tue, Aug 25, 2020 at 09:59:18AM +1000, Dave Chinner wrote:
> On Mon, Aug 24, 2020 at 03:55:06PM +0100, Matthew Wilcox (Oracle) wrote:
> >  static inline struct iomap_page *to_iomap_page(struct page *page)
> >  {
> > +	VM_BUG_ON_PGFLAGS(PageTail(page), page);
> >  	if (page_has_private(page))
> >  		return (struct iomap_page *)page_private(page);
> >  	return NULL;
> 
> Just to confirm: this vm bug check is to needed becuse we only
> attach the iomap_page to the head page of a compound page?
> 
> Assuming that I've understood the above correctly:

That's correct.  If we get a tail page in this path, something's gone
wrong somewhere upstream of us, and the stack trace should tell us where.
It's PGFLAGS so it's usually compiled out by distro kernels (you need to
enable CONFIG_DEBUG_VM_PGFLAGS for it to be active).

It was definitely useful in development; probably not so useful for
a distro kernel to assert.
Darrick J. Wong Aug. 25, 2020, 9:02 p.m. UTC | #3
On Mon, Aug 24, 2020 at 03:55:06PM +0100, Matthew Wilcox (Oracle) wrote:
> Size the uptodate array dynamically to support larger pages in the
> page cache.  With a 64kB page, we're only saving 8 bytes per page today,
> but with a 2MB maximum page size, we'd have to allocate more than 4kB
> per page.  Add a few debugging assertions.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/buffered-io.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index dbf9572dabe9..844e95cacea8 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -22,18 +22,19 @@
>  #include "../internal.h"
>  
>  /*
> - * Structure allocated for each page when block size < PAGE_SIZE to track
> + * Structure allocated for each page when block size < page size to track
>   * sub-page uptodate status and I/O completions.

"for each regular page or head page of a huge page"?  Or whatever we're
calling them nowadays?

--D

>   */
>  struct iomap_page {
>  	atomic_t		read_count;
>  	atomic_t		write_count;
>  	spinlock_t		uptodate_lock;
> -	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
> +	unsigned long		uptodate[];
>  };
>  
>  static inline struct iomap_page *to_iomap_page(struct page *page)
>  {
> +	VM_BUG_ON_PGFLAGS(PageTail(page), page);
>  	if (page_has_private(page))
>  		return (struct iomap_page *)page_private(page);
>  	return NULL;
> @@ -45,11 +46,13 @@ static struct iomap_page *
>  iomap_page_create(struct inode *inode, struct page *page)
>  {
>  	struct iomap_page *iop = to_iomap_page(page);
> +	unsigned int nr_blocks = i_blocks_per_page(inode, page);
>  
> -	if (iop || i_blocks_per_page(inode, page) <= 1)
> +	if (iop || nr_blocks <= 1)
>  		return iop;
>  
> -	iop = kzalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
> +	iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
> +			GFP_NOFS | __GFP_NOFAIL);
>  	spin_lock_init(&iop->uptodate_lock);
>  	attach_page_private(page, iop);
>  	return iop;
> @@ -59,11 +62,14 @@ static void
>  iomap_page_release(struct page *page)
>  {
>  	struct iomap_page *iop = detach_page_private(page);
> +	unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page);
>  
>  	if (!iop)
>  		return;
>  	WARN_ON_ONCE(atomic_read(&iop->read_count));
>  	WARN_ON_ONCE(atomic_read(&iop->write_count));
> +	WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
> +			PageUptodate(page));
>  	kfree(iop);
>  }
>  
> -- 
> 2.28.0
>
Matthew Wilcox Aug. 26, 2020, 2:26 a.m. UTC | #4
On Tue, Aug 25, 2020 at 02:02:03PM -0700, Darrick J. Wong wrote:
> >  /*
> > - * Structure allocated for each page when block size < PAGE_SIZE to track
> > + * Structure allocated for each page when block size < page size to track
> >   * sub-page uptodate status and I/O completions.
> 
> "for each regular page or head page of a huge page"?  Or whatever we're
> calling them nowadays?

Well, that's what I'm calling a "page" ;-)

How about "for each page or THP"?  The fact that it's stored in the
head page is incidental -- it's allocated for the THP.
Darrick J. Wong Aug. 26, 2020, 3:32 a.m. UTC | #5
On Wed, Aug 26, 2020 at 03:26:23AM +0100, Matthew Wilcox wrote:
> On Tue, Aug 25, 2020 at 02:02:03PM -0700, Darrick J. Wong wrote:
> > >  /*
> > > - * Structure allocated for each page when block size < PAGE_SIZE to track
> > > + * Structure allocated for each page when block size < page size to track
> > >   * sub-page uptodate status and I/O completions.
> > 
> > "for each regular page or head page of a huge page"?  Or whatever we're
> > calling them nowadays?
> 
> Well, that's what I'm calling a "page" ;-)
> 
> How about "for each page or THP"?  The fact that it's stored in the
> head page is incidental -- it's allocated for the THP.

Ok.

--D
Christoph Hellwig Aug. 27, 2020, 8:26 a.m. UTC | #6
>  static inline struct iomap_page *to_iomap_page(struct page *page)
>  {
> +	VM_BUG_ON_PGFLAGS(PageTail(page), page);
>  	if (page_has_private(page))
>  		return (struct iomap_page *)page_private(page);
>  	return NULL;

Nit: can you add an empty line after the VM_BUG_ON_PGFLAGS assert to
keep the function readable?  Maybe also add a comment on the assert,
as it isn't totally obvious.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index dbf9572dabe9..844e95cacea8 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -22,18 +22,19 @@ 
 #include "../internal.h"
 
 /*
- * Structure allocated for each page when block size < PAGE_SIZE to track
+ * Structure allocated for each page when block size < page size to track
  * sub-page uptodate status and I/O completions.
  */
 struct iomap_page {
 	atomic_t		read_count;
 	atomic_t		write_count;
 	spinlock_t		uptodate_lock;
-	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
+	unsigned long		uptodate[];
 };
 
 static inline struct iomap_page *to_iomap_page(struct page *page)
 {
+	VM_BUG_ON_PGFLAGS(PageTail(page), page);
 	if (page_has_private(page))
 		return (struct iomap_page *)page_private(page);
 	return NULL;
@@ -45,11 +46,13 @@  static struct iomap_page *
 iomap_page_create(struct inode *inode, struct page *page)
 {
 	struct iomap_page *iop = to_iomap_page(page);
+	unsigned int nr_blocks = i_blocks_per_page(inode, page);
 
-	if (iop || i_blocks_per_page(inode, page) <= 1)
+	if (iop || nr_blocks <= 1)
 		return iop;
 
-	iop = kzalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
+	iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
+			GFP_NOFS | __GFP_NOFAIL);
 	spin_lock_init(&iop->uptodate_lock);
 	attach_page_private(page, iop);
 	return iop;
@@ -59,11 +62,14 @@  static void
 iomap_page_release(struct page *page)
 {
 	struct iomap_page *iop = detach_page_private(page);
+	unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page);
 
 	if (!iop)
 		return;
 	WARN_ON_ONCE(atomic_read(&iop->read_count));
 	WARN_ON_ONCE(atomic_read(&iop->write_count));
+	WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
+			PageUptodate(page));
 	kfree(iop);
 }