diff mbox series

[11/35] nilfs2: Convert nilfs_page_mkwrite() to use a folio

Message ID 20231106173903.1734114-12-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series nilfs2: Folio conversions | expand

Commit Message

Matthew Wilcox Nov. 6, 2023, 5:38 p.m. UTC
Using the new folio APIs saves seven hidden calls to compound_head().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/nilfs2/file.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

Comments

Ryusuke Konishi Nov. 9, 2023, 1:11 p.m. UTC | #1
On Tue, Nov 7, 2023 at 2:39 AM Matthew Wilcox (Oracle) wrote:
>
> Using the new folio APIs saves seven hidden calls to compound_head().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/nilfs2/file.c | 28 +++++++++++++++-------------
>  1 file changed, 15 insertions(+), 13 deletions(-)

I'm still in the middle of reviewing this series, but I had one
question in a relevant part outside of this patch, so I'd like to ask
you a question.

In block_page_mkwrite() that nilfs_page_mkwrite() calls,
__block_write_begin_int() was called with the range using
folio_size(), as shown below:

        end = folio_size(folio);
        /* folio is wholly or partially inside EOF */
        if (folio_pos(folio) + end > size)
                end = size - folio_pos(folio);

        ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
        ...

On the other hand, __block_write_begin_int() takes a folio as an
argument, but uses a PAGE_SIZE-based remainder calculation and BUG_ON
checks:

int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
                get_block_t *get_block, const struct iomap *iomap)
{
        unsigned from = pos & (PAGE_SIZE - 1);
        unsigned to = from + len;
        ...
        BUG_ON(from > PAGE_SIZE);
        BUG_ON(to > PAGE_SIZE);
        ...

So, it looks like this function causes a kernel BUG if it's called
from block_page_mkwrite() and folio_size() exceeds PAGE_SIZE.

Is this constraint intentional or temporary in folio conversions ?

Regards,
Ryusuke Konishi

>
> diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
> index 740ce26d1e76..bec33b89a075 100644
> --- a/fs/nilfs2/file.c
> +++ b/fs/nilfs2/file.c
> @@ -45,34 +45,36 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
>  static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
>  {
>         struct vm_area_struct *vma = vmf->vma;
> -       struct page *page = vmf->page;
> +       struct folio *folio = page_folio(vmf->page);
>         struct inode *inode = file_inode(vma->vm_file);
>         struct nilfs_transaction_info ti;
> +       struct buffer_head *bh, *head;
>         int ret = 0;
>
>         if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
>                 return VM_FAULT_SIGBUS; /* -ENOSPC */
>
>         sb_start_pagefault(inode->i_sb);
> -       lock_page(page);
> -       if (page->mapping != inode->i_mapping ||
> -           page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
> -               unlock_page(page);
> +       folio_lock(folio);
> +       if (folio->mapping != inode->i_mapping ||
> +           folio_pos(folio) >= i_size_read(inode) ||
> +           !folio_test_uptodate(folio)) {
> +               folio_unlock(folio);
>                 ret = -EFAULT;  /* make the VM retry the fault */
>                 goto out;
>         }
>
>         /*
> -        * check to see if the page is mapped already (no holes)
> +        * check to see if the folio is mapped already (no holes)
>          */
> -       if (PageMappedToDisk(page))
> +       if (folio_test_mappedtodisk(folio))
>                 goto mapped;
>
> -       if (page_has_buffers(page)) {
> -               struct buffer_head *bh, *head;
> +       head = folio_buffers(folio);
> +       if (head) {
>                 int fully_mapped = 1;
>
> -               bh = head = page_buffers(page);
> +               bh = head;
>                 do {
>                         if (!buffer_mapped(bh)) {
>                                 fully_mapped = 0;
> @@ -81,11 +83,11 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
>                 } while (bh = bh->b_this_page, bh != head);
>
>                 if (fully_mapped) {
> -                       SetPageMappedToDisk(page);
> +                       folio_set_mappedtodisk(folio);
>                         goto mapped;
>                 }
>         }
> -       unlock_page(page);
> +       folio_unlock(folio);
>
>         /*
>          * fill hole blocks
> @@ -105,7 +107,7 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
>         nilfs_transaction_commit(inode->i_sb);
>
>   mapped:
> -       wait_for_stable_page(page);
> +       folio_wait_stable(folio);
>   out:
>         sb_end_pagefault(inode->i_sb);
>         return vmf_fs_error(ret);
> --
> 2.42.0
>
Matthew Wilcox Nov. 9, 2023, 1:37 p.m. UTC | #2
On Thu, Nov 09, 2023 at 10:11:27PM +0900, Ryusuke Konishi wrote:
> On Tue, Nov 7, 2023 at 2:39 AM Matthew Wilcox (Oracle) wrote:
> >
> > Using the new folio APIs saves seven hidden calls to compound_head().
> >
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  fs/nilfs2/file.c | 28 +++++++++++++++-------------
> >  1 file changed, 15 insertions(+), 13 deletions(-)
> 
> I'm still in the middle of reviewing this series, but I had one
> question in a relevant part outside of this patch, so I'd like to ask
> you a question.
> 
> In block_page_mkwrite() that nilfs_page_mkwrite() calls,
> __block_write_begin_int() was called with the range using
> folio_size(), as shown below:
> 
>         end = folio_size(folio);
>         /* folio is wholly or partially inside EOF */
>         if (folio_pos(folio) + end > size)
>                 end = size - folio_pos(folio);
> 
>         ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
>         ...
> 
> On the other hand, __block_write_begin_int() takes a folio as an
> argument, but uses a PAGE_SIZE-based remainder calculation and BUG_ON
> checks:
> 
> int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
>                 get_block_t *get_block, const struct iomap *iomap)
> {
>         unsigned from = pos & (PAGE_SIZE - 1);
>         unsigned to = from + len;
>         ...
>         BUG_ON(from > PAGE_SIZE);
>         BUG_ON(to > PAGE_SIZE);
>         ...
> 
> So, it looks like this function causes a kernel BUG if it's called
> from block_page_mkwrite() and folio_size() exceeds PAGE_SIZE.
> 
> Is this constraint intentional or temporary in folio conversions ?

Good catch!

At the time I converted __block_write_begin_int() to take a folio
(over two years ago), the plan was that filesystems would convert to
the iomap infrastructure in order to take advantage of large folios.

The needs of the Large Block Size project mean that may not happen;
filesystems want to add support for, eg, 16kB hardware block sizes
without converting to iomap.  So we shoud fix up
__block_write_begin_int().  I'll submit a patch along these lines:

diff --git a/fs/buffer.c b/fs/buffer.c
index cd114110b27f..24a5694f9b41 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2080,25 +2080,24 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
 int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
 		get_block_t *get_block, const struct iomap *iomap)
 {
-	unsigned from = pos & (PAGE_SIZE - 1);
-	unsigned to = from + len;
+	size_t from = offset_in_folio(folio, pos);
+	size_t to = from + len;
 	struct inode *inode = folio->mapping->host;
-	unsigned block_start, block_end;
+	size_t block_start, block_end;
 	sector_t block;
 	int err = 0;
 	unsigned blocksize, bbits;
 	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
 
 	BUG_ON(!folio_test_locked(folio));
-	BUG_ON(from > PAGE_SIZE);
-	BUG_ON(to > PAGE_SIZE);
+	BUG_ON(to > folio_size(folio));
 	BUG_ON(from > to);
 
 	head = folio_create_buffers(folio, inode, 0);
 	blocksize = head->b_size;
 	bbits = block_size_bits(blocksize);
 
-	block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
+	block = ((loff_t)folio->index * PAGE_SIZE) >> bbits;
 
 	for(bh = head, block_start = 0; bh != head || !block_start;
 	    block++, block_start=block_end, bh = bh->b_this_page) {
Ryusuke Konishi Nov. 9, 2023, 2:22 p.m. UTC | #3
On Thu, Nov 9, 2023 at 10:37 PM Matthew Wilcox wrote:
>
> On Thu, Nov 09, 2023 at 10:11:27PM +0900, Ryusuke Konishi wrote:
> > On Tue, Nov 7, 2023 at 2:39 AM Matthew Wilcox (Oracle) wrote:
> > >
> > > Using the new folio APIs saves seven hidden calls to compound_head().
> > >
> > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > ---
> > >  fs/nilfs2/file.c | 28 +++++++++++++++-------------
> > >  1 file changed, 15 insertions(+), 13 deletions(-)
> >
> > I'm still in the middle of reviewing this series, but I had one
> > question in a relevant part outside of this patch, so I'd like to ask
> > you a question.
> >
> > In block_page_mkwrite() that nilfs_page_mkwrite() calls,
> > __block_write_begin_int() was called with the range using
> > folio_size(), as shown below:
> >
> >         end = folio_size(folio);
> >         /* folio is wholly or partially inside EOF */
> >         if (folio_pos(folio) + end > size)
> >                 end = size - folio_pos(folio);
> >
> >         ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
> >         ...
> >
> > On the other hand, __block_write_begin_int() takes a folio as an
> > argument, but uses a PAGE_SIZE-based remainder calculation and BUG_ON
> > checks:
> >
> > int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
> >                 get_block_t *get_block, const struct iomap *iomap)
> > {
> >         unsigned from = pos & (PAGE_SIZE - 1);
> >         unsigned to = from + len;
> >         ...
> >         BUG_ON(from > PAGE_SIZE);
> >         BUG_ON(to > PAGE_SIZE);
> >         ...
> >
> > So, it looks like this function causes a kernel BUG if it's called
> > from block_page_mkwrite() and folio_size() exceeds PAGE_SIZE.
> >
> > Is this constraint intentional or temporary in folio conversions ?
>
> Good catch!
>
> At the time I converted __block_write_begin_int() to take a folio
> (over two years ago), the plan was that filesystems would convert to
> the iomap infrastructure in order to take advantage of large folios.
>
> The needs of the Large Block Size project mean that may not happen;
> filesystems want to add support for, eg, 16kB hardware block sizes
> without converting to iomap.  So we shoud fix up
> __block_write_begin_int().  I'll submit a patch along these lines:
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index cd114110b27f..24a5694f9b41 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -2080,25 +2080,24 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
>  int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
>                 get_block_t *get_block, const struct iomap *iomap)
>  {
> -       unsigned from = pos & (PAGE_SIZE - 1);
> -       unsigned to = from + len;
> +       size_t from = offset_in_folio(folio, pos);
> +       size_t to = from + len;
>         struct inode *inode = folio->mapping->host;
> -       unsigned block_start, block_end;
> +       size_t block_start, block_end;
>         sector_t block;
>         int err = 0;
>         unsigned blocksize, bbits;
>         struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
>
>         BUG_ON(!folio_test_locked(folio));
> -       BUG_ON(from > PAGE_SIZE);
> -       BUG_ON(to > PAGE_SIZE);
> +       BUG_ON(to > folio_size(folio));
>         BUG_ON(from > to);
>
>         head = folio_create_buffers(folio, inode, 0);
>         blocksize = head->b_size;
>         bbits = block_size_bits(blocksize);
>
> -       block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
> +       block = ((loff_t)folio->index * PAGE_SIZE) >> bbits;
>
>         for(bh = head, block_start = 0; bh != head || !block_start;
>             block++, block_start=block_end, bh = bh->b_this_page) {

I got it.
We may have to look at the entire function, but if this change is
applied, it seems to be fine, at least for my question.

Thank you for your prompt correction!

Regards,
Ryusuke Konishi
diff mbox series

Patch

diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 740ce26d1e76..bec33b89a075 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -45,34 +45,36 @@  int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	struct page *page = vmf->page;
+	struct folio *folio = page_folio(vmf->page);
 	struct inode *inode = file_inode(vma->vm_file);
 	struct nilfs_transaction_info ti;
+	struct buffer_head *bh, *head;
 	int ret = 0;
 
 	if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
 		return VM_FAULT_SIGBUS; /* -ENOSPC */
 
 	sb_start_pagefault(inode->i_sb);
-	lock_page(page);
-	if (page->mapping != inode->i_mapping ||
-	    page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
-		unlock_page(page);
+	folio_lock(folio);
+	if (folio->mapping != inode->i_mapping ||
+	    folio_pos(folio) >= i_size_read(inode) ||
+	    !folio_test_uptodate(folio)) {
+		folio_unlock(folio);
 		ret = -EFAULT;	/* make the VM retry the fault */
 		goto out;
 	}
 
 	/*
-	 * check to see if the page is mapped already (no holes)
+	 * check to see if the folio is mapped already (no holes)
 	 */
-	if (PageMappedToDisk(page))
+	if (folio_test_mappedtodisk(folio))
 		goto mapped;
 
-	if (page_has_buffers(page)) {
-		struct buffer_head *bh, *head;
+	head = folio_buffers(folio);
+	if (head) {
 		int fully_mapped = 1;
 
-		bh = head = page_buffers(page);
+		bh = head;
 		do {
 			if (!buffer_mapped(bh)) {
 				fully_mapped = 0;
@@ -81,11 +83,11 @@  static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
 		} while (bh = bh->b_this_page, bh != head);
 
 		if (fully_mapped) {
-			SetPageMappedToDisk(page);
+			folio_set_mappedtodisk(folio);
 			goto mapped;
 		}
 	}
-	unlock_page(page);
+	folio_unlock(folio);
 
 	/*
 	 * fill hole blocks
@@ -105,7 +107,7 @@  static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
 	nilfs_transaction_commit(inode->i_sb);
 
  mapped:
-	wait_for_stable_page(page);
+	folio_wait_stable(folio);
  out:
 	sb_end_pagefault(inode->i_sb);
 	return vmf_fs_error(ret);