diff mbox series

[RFC,3/8] fs/buffer: restart block_read_full_folio() to avoid array overflow

Message ID 20241113094727.1497722-4-mcgrof@kernel.org (mailing list archive)
State New
Headers show
Series enable bs > ps for block devices | expand

Commit Message

Luis Chamberlain Nov. 13, 2024, 9:47 a.m. UTC
From: Hannes Reinecke <hare@suse.de>

block_read_full_folio() uses an on-stack array to hold any buffer_heads
which should be updated. The array is sized for the number of buffer_heads
per PAGE_SIZE, which of course will overflow for large folios.
So instead of increasing the size of the array (and thereby incurring
a possible stack overflow for really large folios) stop the iteration
when the array is filled up, submit these buffer_heads, and restart
the iteration with the remaining buffer_heads.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 fs/buffer.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

Comments

Matthew Wilcox Nov. 13, 2024, 6:50 p.m. UTC | #1
On Wed, Nov 13, 2024 at 01:47:22AM -0800, Luis Chamberlain wrote:
> +++ b/fs/buffer.c
> @@ -2366,7 +2366,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
>  {
>  	struct inode *inode = folio->mapping->host;
>  	sector_t iblock, lblock;
> -	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
> +	struct buffer_head *bh, *head, *restart_bh = NULL, *arr[MAX_BUF_PER_PAGE];

MAX_BUF_PER_PAGE is a pain.  There are configs like hexagon which have
256kB pages and so this array ends up being 512 * 8 bytes = 4kB in size
which spews stack growth warnings.  Can we just make this 8?

> @@ -2385,6 +2385,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
>  	iblock = div_u64(folio_pos(folio), blocksize);
>  	lblock = div_u64(limit + blocksize - 1, blocksize);
>  	bh = head;
> +restart:
>  	nr = 0;
>  	i = 0;
>  
> @@ -2417,7 +2418,12 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
>  				continue;
>  		}
>  		arr[nr++] = bh;
> -	} while (i++, iblock++, (bh = bh->b_this_page) != head);
> +	} while (i++, iblock++, (bh = bh->b_this_page) != head && nr < MAX_BUF_PER_PAGE);
> +
> +	if (nr == MAX_BUF_PER_PAGE && bh != head)
> +		restart_bh = bh;
> +	else
> +		restart_bh = NULL;
>  
>  	if (fully_mapped)
>  		folio_set_mappedtodisk(folio);
> @@ -2450,6 +2456,15 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
>  		else
>  			submit_bh(REQ_OP_READ, bh);
>  	}
> +
> +	/*
> +	 * Found more buffers than 'arr' could hold,
> +	 * restart to submit the remaining ones.
> +	 */
> +	if (restart_bh) {
> +		bh = restart_bh;
> +		goto restart;
> +	}
>  	return 0;

This isn't right.

Let's assume we need 16 blocks to fill in this folio and we have 8
entries in 'arr'.

        nr = 0;
        i = 0;

        do {
                if (buffer_uptodate(bh))
                        continue;
...
                arr[nr++] = bh;
        } while (i++, iblock++, (bh = bh->b_this_page) != head);

        for (i = 0; i < nr; i++) {
                bh = arr[i];
                        submit_bh(REQ_OP_READ, bh);

OK, so first time round, we've submitted 8 I/Os.  Now we see that
restart_bh is not NULL and so we go round again.

This time, we happen to find that the last 8 BHs are uptodate.
And so we take this path:

        if (!nr) {
                /*
                 * All buffers are uptodate or get_block() returned an
                 * error when trying to map them - we can finish the read.
                 */
                folio_end_read(folio, !page_error);

oops, we forgot about the 8 buffers we already submitted for read.
diff mbox series

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index 1fc9a50def0b..818c9c5840fe 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2366,7 +2366,7 @@  int block_read_full_folio(struct folio *folio, get_block_t *get_block)
 {
 	struct inode *inode = folio->mapping->host;
 	sector_t iblock, lblock;
-	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+	struct buffer_head *bh, *head, *restart_bh = NULL, *arr[MAX_BUF_PER_PAGE];
 	size_t blocksize;
 	int nr, i;
 	int fully_mapped = 1;
@@ -2385,6 +2385,7 @@  int block_read_full_folio(struct folio *folio, get_block_t *get_block)
 	iblock = div_u64(folio_pos(folio), blocksize);
 	lblock = div_u64(limit + blocksize - 1, blocksize);
 	bh = head;
+restart:
 	nr = 0;
 	i = 0;
 
@@ -2417,7 +2418,12 @@  int block_read_full_folio(struct folio *folio, get_block_t *get_block)
 				continue;
 		}
 		arr[nr++] = bh;
-	} while (i++, iblock++, (bh = bh->b_this_page) != head);
+	} while (i++, iblock++, (bh = bh->b_this_page) != head && nr < MAX_BUF_PER_PAGE);
+
+	if (nr == MAX_BUF_PER_PAGE && bh != head)
+		restart_bh = bh;
+	else
+		restart_bh = NULL;
 
 	if (fully_mapped)
 		folio_set_mappedtodisk(folio);
@@ -2450,6 +2456,15 @@  int block_read_full_folio(struct folio *folio, get_block_t *get_block)
 		else
 			submit_bh(REQ_OP_READ, bh);
 	}
+
+	/*
+	 * Found more buffers than 'arr' could hold,
+	 * restart to submit the remaining ones.
+	 */
+	if (restart_bh) {
+		bh = restart_bh;
+		goto restart;
+	}
 	return 0;
 }
 EXPORT_SYMBOL(block_read_full_folio);