[02/10] ext2: Convert ext2_check_page to ext2_check_folio

Message ID	20230921200746.3303942-2-willy@infradead.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-fsdevel-owner@vger.kernel.org> From: "Matthew Wilcox (Oracle)" <willy@infradead.org> To: Jan Kara <jack@suse.com> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>, linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org, "Fabio M . De Francesco" <fmdefrancesco@gmail.com> Subject: [PATCH 02/10] ext2: Convert ext2_check_page to ext2_check_folio Date: Thu, 21 Sep 2023 21:07:39 +0100 Message-Id: <20230921200746.3303942-2-willy@infradead.org> In-Reply-To: <20230921200746.3303942-1-willy@infradead.org> References: <20230921200746.3303942-1-willy@infradead.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	[01/10] highmem: Add folio_release_kmap() \| expand [01/10] highmem: Add folio_release_kmap() [02/10] ext2: Convert ext2_check_page to ext2_check_folio [03/10] ext2: Add ext2_get_folio() [04/10] ext2: Convert ext2_readdir to use a folio [05/10] ext2: Convert ext2_add_link() to use a folio [06/10] ext2: Convert ext2_empty_dir() to use a folio [07/10] ext2: Handle large block size directories in ext2_delete_entry() [08/10] ext2: Convert ext2_unlink() and ext2_rename() to use folios [09/10] ext2: Convert ext2_make_empty() to use a folio

Message ID

20230921200746.3303942-2-willy@infradead.org (mailing list archive)

State

New, archived

Headers

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
To: Jan Kara <jack@suse.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
        linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
        "Fabio M . De Francesco" <fmdefrancesco@gmail.com>
Subject: [PATCH 02/10] ext2: Convert ext2_check_page to ext2_check_folio
Date: Thu, 21 Sep 2023 21:07:39 +0100
Message-Id: <20230921200746.3303942-2-willy@infradead.org>
In-Reply-To: <20230921200746.3303942-1-willy@infradead.org>
References: <20230921200746.3303942-1-willy@infradead.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

[01/10] highmem: Add folio_release_kmap() | expand

Commit Message

Matthew Wilcox Sept. 21, 2023, 8:07 p.m. UTC

Support in this function for large folios is limited to supporting
filesystems with block size > PAGE_SIZE.  This new functionality will only
be supported on machines without HIGHMEM, so the problem of kmap_local
only being able to map a single page in the folio can be ignored.
We will not use large folios for ext2 directories on HIGHMEM machines.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/ext2/dir.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

Comments

Jan Kara Oct. 3, 2023, 10:40 a.m. UTC | #1

On Thu 21-09-23 21:07:39, Matthew Wilcox (Oracle) wrote:
> Support in this function for large folios is limited to supporting
> filesystems with block size > PAGE_SIZE.  This new functionality will only
> be supported on machines without HIGHMEM, so the problem of kmap_local
> only being able to map a single page in the folio can be ignored.
> We will not use large folios for ext2 directories on HIGHMEM machines.

OK, but can we perhaps enforce this with some checks & error messages
instead of a silent failure? Like:

#ifdef CONFIG_HIGHMEM
	if (sb->s_blocksize > PAGE_SIZE)
		bail with error
#endif

somewhere in ext2_fill_super()? Or maybe force allocation of lowmem pages
when blocksize > PAGE_SIZE?

								Honza

> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/ext2/dir.c | 28 ++++++++++++++--------------
>  1 file changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
> index b335f17f682f..03867381eec2 100644
> --- a/fs/ext2/dir.c
> +++ b/fs/ext2/dir.c
> @@ -96,19 +96,19 @@ static void ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
>  	unlock_page(page);
>  }
>  
> -static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
> +static bool ext2_check_folio(struct folio *folio, int quiet, char *kaddr)
>  {
> -	struct inode *dir = page->mapping->host;
> +	struct inode *dir = folio->mapping->host;
>  	struct super_block *sb = dir->i_sb;
>  	unsigned chunk_size = ext2_chunk_size(dir);
>  	u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
>  	unsigned offs, rec_len;
> -	unsigned limit = PAGE_SIZE;
> +	unsigned limit = folio_size(folio);
>  	ext2_dirent *p;
>  	char *error;
>  
> -	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
> -		limit = dir->i_size & ~PAGE_MASK;
> +	if (dir->i_size < folio_pos(folio) + limit) {
> +		limit = offset_in_folio(folio, dir->i_size);
>  		if (limit & (chunk_size - 1))
>  			goto Ebadsize;
>  		if (!limit)
> @@ -132,7 +132,7 @@ static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
>  	if (offs != limit)
>  		goto Eend;
>  out:
> -	SetPageChecked(page);
> +	folio_set_checked(folio);
>  	return true;
>  
>  	/* Too bad, we had an error */
> @@ -160,22 +160,22 @@ static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
>  bad_entry:
>  	if (!quiet)
>  		ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - "
> -			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
> -			dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
> +			"offset=%llu, inode=%lu, rec_len=%d, name_len=%d",
> +			dir->i_ino, error, folio_pos(folio) + offs,
>  			(unsigned long) le32_to_cpu(p->inode),
>  			rec_len, p->name_len);
>  	goto fail;
>  Eend:
>  	if (!quiet) {
>  		p = (ext2_dirent *)(kaddr + offs);
> -		ext2_error(sb, "ext2_check_page",
> +		ext2_error(sb, "ext2_check_folio",
>  			"entry in directory #%lu spans the page boundary"
> -			"offset=%lu, inode=%lu",
> -			dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
> +			"offset=%llu, inode=%lu",
> +			dir->i_ino, folio_pos(folio) + offs,
>  			(unsigned long) le32_to_cpu(p->inode));
>  	}
>  fail:
> -	SetPageError(page);
> +	folio_set_error(folio);
>  	return false;
>  }
>  
> @@ -195,9 +195,9 @@ static void *ext2_get_page(struct inode *dir, unsigned long n,
>  
>  	if (IS_ERR(folio))
>  		return ERR_CAST(folio);
> -	page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1));
> +	page_addr = kmap_local_folio(folio, 0);
>  	if (unlikely(!folio_test_checked(folio))) {
> -		if (!ext2_check_page(&folio->page, quiet, page_addr))
> +		if (!ext2_check_folio(folio, quiet, page_addr))
>  			goto fail;
>  	}
>  	*page = &folio->page;
> -- 
> 2.40.1
>

Jan Kara Oct. 3, 2023, 10:52 a.m. UTC | #2

On Tue 03-10-23 12:40:17, Jan Kara wrote:
> On Thu 21-09-23 21:07:39, Matthew Wilcox (Oracle) wrote:
> > Support in this function for large folios is limited to supporting
> > filesystems with block size > PAGE_SIZE.  This new functionality will only
> > be supported on machines without HIGHMEM, so the problem of kmap_local
> > only being able to map a single page in the folio can be ignored.
> > We will not use large folios for ext2 directories on HIGHMEM machines.
> 
> OK, but can we perhaps enforce this with some checks & error messages
> instead of a silent failure? Like:
> 
> #ifdef CONFIG_HIGHMEM
> 	if (sb->s_blocksize > PAGE_SIZE)
> 		bail with error
> #endif
> 
> somewhere in ext2_fill_super()? Or maybe force allocation of lowmem pages
> when blocksize > PAGE_SIZE?
> 
> > @@ -195,9 +195,9 @@ static void *ext2_get_page(struct inode *dir, unsigned long n,
> >  
> >  	if (IS_ERR(folio))
> >  		return ERR_CAST(folio);
> > -	page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1));
> > +	page_addr = kmap_local_folio(folio, 0);

Oh, and I think this change breaks the code whenever we get back higher
order folio because the page_addr we get back is not for the page index
'n'.
								Honza

Jan Kara Oct. 3, 2023, 11:02 a.m. UTC | #3

On Thu 21-09-23 21:07:39, Matthew Wilcox (Oracle) wrote:
> Support in this function for large folios is limited to supporting
> filesystems with block size > PAGE_SIZE.  This new functionality will only
> be supported on machines without HIGHMEM, so the problem of kmap_local
> only being able to map a single page in the folio can be ignored.
> We will not use large folios for ext2 directories on HIGHMEM machines.

A look like a fool replying to the same patch multiple times ;) but how is
this supposed to work even without HIGHMEM? Suppose we have a page size 4k
and block size 16k. Directory entries in a block can then straddle 4k
boundary so if kmap_local() is mapping only a single page, then we are
going to have hard time parsing this all?

Oh, I guess you are pointing to the fact kmap_local_folio() gives you back
address usable for the whole folio access if !HIGHMEM. But then all the
iterations (e.g. in ext2_readdir()) has to be folio-size based and not
page-size based as they currently are? You didn't change these iterations
in your patches which has confused me...

								Honza

Matthew Wilcox Oct. 5, 2023, 6:27 p.m. UTC | #4

On Tue, Oct 03, 2023 at 12:52:24PM +0200, Jan Kara wrote:
> On Tue 03-10-23 12:40:17, Jan Kara wrote:
> > On Thu 21-09-23 21:07:39, Matthew Wilcox (Oracle) wrote:
> > > Support in this function for large folios is limited to supporting
> > > filesystems with block size > PAGE_SIZE.  This new functionality will only
> > > be supported on machines without HIGHMEM, so the problem of kmap_local
> > > only being able to map a single page in the folio can be ignored.
> > > We will not use large folios for ext2 directories on HIGHMEM machines.
> > 
> > OK, but can we perhaps enforce this with some checks & error messages
> > instead of a silent failure? Like:
> > 
> > #ifdef CONFIG_HIGHMEM
> > 	if (sb->s_blocksize > PAGE_SIZE)
> > 		bail with error
> > #endif
> > 
> > somewhere in ext2_fill_super()? Or maybe force allocation of lowmem pages
> > when blocksize > PAGE_SIZE?
> > 
> > > @@ -195,9 +195,9 @@ static void *ext2_get_page(struct inode *dir, unsigned long n,
> > >  
> > >  	if (IS_ERR(folio))
> > >  		return ERR_CAST(folio);
> > > -	page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1));
> > > +	page_addr = kmap_local_folio(folio, 0);
> 
> Oh, and I think this change breaks the code whenever we get back higher
> order folio because the page_addr we get back is not for the page index
> 'n'.

> A look like a fool replying to the same patch multiple times ;) but how is
> this supposed to work even without HIGHMEM? Suppose we have a page size 4k
> and block size 16k. Directory entries in a block can then straddle 4k
> boundary so if kmap_local() is mapping only a single page, then we are
> going to have hard time parsing this all?
> 
> Oh, I guess you are pointing to the fact kmap_local_folio() gives you back
> address usable for the whole folio access if !HIGHMEM. But then all the
> iterations (e.g. in ext2_readdir()) has to be folio-size based and not
> page-size based as they currently are? You didn't change these iterations
> in your patches which has confused me...

I'm going to reply to all three emails as one ;-)

The goal for this patchset is simply to do the folio conversion.
Earlier work was focused on "make sure everything behaves as it did
before", but now I've started to give some thought to "How will this
work" without actually doing the higher level work.

Today, ext2 does not permit large folios to be used for directory
inodes.  Whoever adds the call to mapping_set_large_folios() gets the
joy of finding all the places that contain assumptions and fixing them.
I don't particularly want to do that myself; I'm just trying to sort
out the underpinnings so that whoever does it has a somewhat sane
API to work against.

So I'm really just trying to do two things here:

 - Convert uses of struct page -> struct folio
 - Not leave any landmines for whoever comes after me

I had a quick look at converting ext2_readdir() to work for
large folios.  It's a bit intrusive, and I haven't done any
serious testing (let alone with the bs>PS patches).  But it could
look something like this:


diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 6807df637112..5eeb57ce6a18 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -257,45 +257,45 @@ static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
 static int
 ext2_readdir(struct file *file, struct dir_context *ctx)
 {
-	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
+	loff_t pos = ctx->pos;
+	loff_t isize = i_size_read(inode);
 	struct super_block *sb = inode->i_sb;
-	unsigned int offset = pos & ~PAGE_MASK;
-	unsigned long n = pos >> PAGE_SHIFT;
-	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
 	bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
 	bool has_filetype;
 
-	if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
+	if (pos > isize - EXT2_DIR_REC_LEN(1))
 		return 0;
 
 	has_filetype =
 		EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE);
 
-	for ( ; n < npages; n++, offset = 0) {
+	while (pos < isize) {
 		ext2_dirent *de;
 		struct folio *folio;
-		char *kaddr = ext2_get_folio(inode, n, 0, &folio);
+		char *kaddr = ext2_get_folio(inode, pos / PAGE_SIZE, 0, &folio);
+		size_t offset = offset_in_folio(folio, pos);
 		char *limit;
 
 		if (IS_ERR(kaddr)) {
 			ext2_error(sb, __func__,
 				   "bad page in #%lu",
 				   inode->i_ino);
-			ctx->pos += PAGE_SIZE - offset;
+			ctx->pos += folio_size(folio) - offset;
 			return PTR_ERR(kaddr);
 		}
 		if (unlikely(need_revalidate)) {
 			if (offset) {
 				offset = ext2_validate_entry(kaddr, offset, chunk_mask);
-				ctx->pos = (n<<PAGE_SHIFT) + offset;
+				ctx->pos = folio_pos(folio) + offset;
 			}
 			file->f_version = inode_query_iversion(inode);
 			need_revalidate = false;
 		}
 		de = (ext2_dirent *)(kaddr+offset);
-		limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1);
+		limit = kaddr + ext2_last_byte(inode, folio->index) -
+				EXT2_DIR_REC_LEN(1);
 		for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
 			if (de->rec_len == 0) {
 				ext2_error(sb, __func__,
@@ -319,6 +319,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
 			ctx->pos += ext2_rec_len_from_disk(de->rec_len);
 		}
 		folio_release_kmap(folio, kaddr);
+		pos = (loff_t)folio_next_index(folio) * PAGE_SIZE;
 	}
 	return 0;
 }

ext2_last_byte() is clearly now incorrect.  It should probably take
isize and folio as inputs and return either the last byte in the folio
or the last byte in the file.  But then we need to convert all the
callers, and I didn't want to do that for this demonstration patch.

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index b335f17f682f..03867381eec2 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -96,19 +96,19 @@  static void ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
 	unlock_page(page);
 }
 
-static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
+static bool ext2_check_folio(struct folio *folio, int quiet, char *kaddr)
 {
-	struct inode *dir = page->mapping->host;
+	struct inode *dir = folio->mapping->host;
 	struct super_block *sb = dir->i_sb;
 	unsigned chunk_size = ext2_chunk_size(dir);
 	u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_SIZE;
+	unsigned limit = folio_size(folio);
 	ext2_dirent *p;
 	char *error;
 
-	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_MASK;
+	if (dir->i_size < folio_pos(folio) + limit) {
+		limit = offset_in_folio(folio, dir->i_size);
 		if (limit & (chunk_size - 1))
 			goto Ebadsize;
 		if (!limit)
@@ -132,7 +132,7 @@  static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
 	if (offs != limit)
 		goto Eend;
 out:
-	SetPageChecked(page);
+	folio_set_checked(folio);
 	return true;
 
 	/* Too bad, we had an error */
@@ -160,22 +160,22 @@  static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
 bad_entry:
 	if (!quiet)
 		ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - "
-			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
-			dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
+			"offset=%llu, inode=%lu, rec_len=%d, name_len=%d",
+			dir->i_ino, error, folio_pos(folio) + offs,
 			(unsigned long) le32_to_cpu(p->inode),
 			rec_len, p->name_len);
 	goto fail;
 Eend:
 	if (!quiet) {
 		p = (ext2_dirent *)(kaddr + offs);
-		ext2_error(sb, "ext2_check_page",
+		ext2_error(sb, "ext2_check_folio",
 			"entry in directory #%lu spans the page boundary"
-			"offset=%lu, inode=%lu",
-			dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
+			"offset=%llu, inode=%lu",
+			dir->i_ino, folio_pos(folio) + offs,
 			(unsigned long) le32_to_cpu(p->inode));
 	}
 fail:
-	SetPageError(page);
+	folio_set_error(folio);
 	return false;
 }
 
@@ -195,9 +195,9 @@  static void *ext2_get_page(struct inode *dir, unsigned long n,
 
 	if (IS_ERR(folio))
 		return ERR_CAST(folio);
-	page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1));
+	page_addr = kmap_local_folio(folio, 0);
 	if (unlikely(!folio_test_checked(folio))) {
-		if (!ext2_check_page(&folio->page, quiet, page_addr))
+		if (!ext2_check_folio(folio, quiet, page_addr))
 			goto fail;
 	}
 	*page = &folio->page;

[02/10] ext2: Convert ext2_check_page to ext2_check_folio

Commit Message

Comments

Patch