[f2fs-dev] f2fs: Introduce linear search for dentries

Message ID	20241217075552.1167994-1-chullee@google.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <linux-f2fs-devel-bounces@lists.sourceforge.net> Date: Mon, 16 Dec 2024 23:55:52 -0800 Mime-Version: 1.0 Message-ID: <20241217075552.1167994-1-chullee@google.com> To: Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org> Subject: [f2fs-dev] [PATCH] f2fs: Introduce linear search for dentries Precedence: list From: Daniel Lee via Linux-f2fs-devel <linux-f2fs-devel@lists.sourceforge.net> Reply-To: Daniel Lee <chullee@google.com> Cc: linux-kernel@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: linux-f2fs-devel-bounces@lists.sourceforge.net
Series	[f2fs-dev] f2fs: Introduce linear search for dentries \| expand [f2fs-dev] f2fs: Introduce linear search for dentries

Message ID

20241217075552.1167994-1-chullee@google.com (mailing list archive)

State

Superseded

Headers

Date: Mon, 16 Dec 2024 23:55:52 -0800
Mime-Version: 1.0
Message-ID: <20241217075552.1167994-1-chullee@google.com>
To: Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>
Subject: [f2fs-dev] [PATCH] f2fs: Introduce linear search for dentries
Precedence: list
From: Daniel Lee via Linux-f2fs-devel <linux-f2fs-devel@lists.sourceforge.net>
Reply-To: Daniel Lee <chullee@google.com>
Cc: linux-kernel@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: linux-f2fs-devel-bounces@lists.sourceforge.net

Series

[f2fs-dev] f2fs: Introduce linear search for dentries | expand

Commit Message

Daniel Lee Dec. 17, 2024, 7:55 a.m. UTC

This patch addresses an issue where some files in case-insensitive
directories become inaccessible due to changes in how the kernel function,
utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
("unicode: Don't special case ignorable code points").

F2FS uses these case-folded names to calculate hash values for locating
dentries and stores them on disk. Since utf8_casefold() can produce
different output across kernel versions, stored hash values and newly
calculated hash values may differ. This results in affected files no
longer being found via the hash-based lookup.

To resolve this, the patch introduces a linear search fallback.
If the initial hash-based search fails, F2FS will sequentially scan the
directory entries.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
Signed-off-by: Daniel Lee <chullee@google.com>
---
 fs/f2fs/dir.c    | 38 ++++++++++++++++++++++++++++----------
 fs/f2fs/f2fs.h   |  6 ++++--
 fs/f2fs/inline.c |  5 +++--
 3 files changed, 35 insertions(+), 14 deletions(-)

Comments

Chao Yu Dec. 19, 2024, 1:29 p.m. UTC | #1

Hi Daniel,

On 2024/12/17 15:55, Daniel Lee wrote:
> This patch addresses an issue where some files in case-insensitive
> directories become inaccessible due to changes in how the kernel function,
> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
> ("unicode: Don't special case ignorable code points").
> 
> F2FS uses these case-folded names to calculate hash values for locating
> dentries and stores them on disk. Since utf8_casefold() can produce
> different output across kernel versions, stored hash values and newly
> calculated hash values may differ. This results in affected files no
> longer being found via the hash-based lookup.
> 
> To resolve this, the patch introduces a linear search fallback.
> If the initial hash-based search fails, F2FS will sequentially scan the
> directory entries.
> 

Need a fixes line?

> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
> Signed-off-by: Daniel Lee <chullee@google.com>
> ---
>   fs/f2fs/dir.c    | 38 ++++++++++++++++++++++++++++----------
>   fs/f2fs/f2fs.h   |  6 ++++--
>   fs/f2fs/inline.c |  5 +++--
>   3 files changed, 35 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> index 47a5c806cf16..a85d19b4e178 100644
> --- a/fs/f2fs/dir.c
> +++ b/fs/f2fs/dir.c
> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
>   static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>   				struct page *dentry_page,
>   				const struct f2fs_filename *fname,
> -				int *max_slots)
> +				int *max_slots,
> +				bool use_hash)
>   {
>   	struct f2fs_dentry_block *dentry_blk;
>   	struct f2fs_dentry_ptr d;
> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>   	dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
>   
>   	make_dentry_ptr_block(dir, &d, dentry_blk);
> -	return f2fs_find_target_dentry(&d, fname, max_slots);
> +	return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
>   }
>   
>   static inline int f2fs_match_name(const struct inode *dir,
> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
>   }
>   
>   struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> -			const struct f2fs_filename *fname, int *max_slots)
> +			const struct f2fs_filename *fname, int *max_slots,
> +			bool use_hash)
>   {
>   	struct f2fs_dir_entry *de;
>   	unsigned long bit_pos = 0;
> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>   			continue;
>   		}
>   
> -		if (de->hash_code == fname->hash) {
> +		if (!use_hash || de->hash_code == fname->hash) {
>   			res = f2fs_match_name(d->inode, fname,
>   					      d->filename[bit_pos],
>   					      le16_to_cpu(de->name_len));
> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>   static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>   					unsigned int level,
>   					const struct f2fs_filename *fname,
> -					struct page **res_page)
> +					struct page **res_page,
> +					bool use_hash)
>   {
>   	int s = GET_DENTRY_SLOTS(fname->disk_name.len);
>   	unsigned int nbucket, nblock;
> -	unsigned int bidx, end_block;
> +	unsigned int bidx, end_block, bucket_no;
>   	struct page *dentry_page;
>   	struct f2fs_dir_entry *de = NULL;
>   	pgoff_t next_pgofs;
> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>   	nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
>   	nblock = bucket_blocks(level);
>   
> +	bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
> +
> +start_find_bucket:
>   	bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
> -			       le32_to_cpu(fname->hash) % nbucket);
> +			       bucket_no);
>   	end_block = bidx + nblock;
>   
>   	while (bidx < end_block) {
> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>   			}
>   		}
>   
> -		de = find_in_block(dir, dentry_page, fname, &max_slots);
> +		de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
>   		if (IS_ERR(de)) {
>   			*res_page = ERR_CAST(de);
>   			de = NULL;
> @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>   		bidx++;
>   	}
>   
> +	if (!use_hash && !de && ++bucket_no < nbucket)
> +		goto start_find_bucket;
> +
>   	if (!de && room && F2FS_I(dir)->chash != fname->hash) {

Do we need to avoid accessing or assigning hash if use_hash is false?

Thanks,

>   		F2FS_I(dir)->chash = fname->hash;
>   		F2FS_I(dir)->clevel = level;
> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>   	struct f2fs_dir_entry *de = NULL;
>   	unsigned int max_depth;
>   	unsigned int level;
> +	bool use_hash = true;
>   
>   	*res_page = NULL;
>   
> +start_find_entry:
>   	if (f2fs_has_inline_dentry(dir)) {
> -		de = f2fs_find_in_inline_dir(dir, fname, res_page);
> +		de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
>   		goto out;
>   	}
>   
> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>   	}
>   
>   	for (level = 0; level < max_depth; level++) {
> -		de = find_in_level(dir, level, fname, res_page);
> +		de = find_in_level(dir, level, fname, res_page, use_hash);
>   		if (de || IS_ERR(*res_page))
>   			break;
>   	}
> +
>   out:
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (IS_CASEFOLDED(dir) && !de && use_hash) {
> +		use_hash = false;
> +		goto start_find_entry;
> +	}
> +#endif
>   	/* This is to increase the speed of f2fs_create */
>   	if (!de)
>   		F2FS_I(dir)->task = current;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index f523dd302bf6..1afebb9c4061 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
>   			struct f2fs_filename *fname);
>   void f2fs_free_filename(struct f2fs_filename *fname);
>   struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> -			const struct f2fs_filename *fname, int *max_slots);
> +			const struct f2fs_filename *fname, int *max_slots,
> +			bool use_hash);
>   int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
>   			unsigned int start_pos, struct fscrypt_str *fstr);
>   void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
>   int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
>   struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>   					const struct f2fs_filename *fname,
> -					struct page **res_page);
> +					struct page **res_page,
> +					bool use_hash);
>   int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>   			struct page *ipage);
>   int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> index cbd2a0d34804..3e3c35d4c98b 100644
> --- a/fs/f2fs/inline.c
> +++ b/fs/f2fs/inline.c
> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>   
>   struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>   					const struct f2fs_filename *fname,
> -					struct page **res_page)
> +					struct page **res_page,
> +					bool use_hash)
>   {
>   	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
>   	struct f2fs_dir_entry *de;
> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>   	inline_dentry = inline_data_addr(dir, ipage);
>   
>   	make_dentry_ptr_inline(dir, &d, inline_dentry);
> -	de = f2fs_find_target_dentry(&d, fname, NULL);
> +	de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
>   	unlock_page(ipage);
>   	if (IS_ERR(de)) {
>   		*res_page = ERR_CAST(de);

Daniel Lee Dec. 20, 2024, 4:59 a.m. UTC | #2

On Thu, Dec 19, 2024 at 5:29 AM Chao Yu <chao@kernel.org> wrote:
>
> Hi Daniel,
>
> On 2024/12/17 15:55, Daniel Lee wrote:
> > This patch addresses an issue where some files in case-insensitive
> > directories become inaccessible due to changes in how the kernel function,
> > utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
> > ("unicode: Don't special case ignorable code points").
> >
> > F2FS uses these case-folded names to calculate hash values for locating
> > dentries and stores them on disk. Since utf8_casefold() can produce
> > different output across kernel versions, stored hash values and newly
> > calculated hash values may differ. This results in affected files no
> > longer being found via the hash-based lookup.
> >
> > To resolve this, the patch introduces a linear search fallback.
> > If the initial hash-based search fails, F2FS will sequentially scan the
> > directory entries.
> >
>
> Need a fixes line?

Thanks. I will add the commit 5c26d2f1d3f5 to the Fixes:

>
> > Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
> > Signed-off-by: Daniel Lee <chullee@google.com>
> > ---
> >   fs/f2fs/dir.c    | 38 ++++++++++++++++++++++++++++----------
> >   fs/f2fs/f2fs.h   |  6 ++++--
> >   fs/f2fs/inline.c |  5 +++--
> >   3 files changed, 35 insertions(+), 14 deletions(-)
> >
> > diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> > index 47a5c806cf16..a85d19b4e178 100644
> > --- a/fs/f2fs/dir.c
> > +++ b/fs/f2fs/dir.c
> > @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
> >   static struct f2fs_dir_entry *find_in_block(struct inode *dir,
> >                               struct page *dentry_page,
> >                               const struct f2fs_filename *fname,
> > -                             int *max_slots)
> > +                             int *max_slots,
> > +                             bool use_hash)
> >   {
> >       struct f2fs_dentry_block *dentry_blk;
> >       struct f2fs_dentry_ptr d;
> > @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
> >       dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
> >
> >       make_dentry_ptr_block(dir, &d, dentry_blk);
> > -     return f2fs_find_target_dentry(&d, fname, max_slots);
> > +     return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
> >   }
> >
> >   static inline int f2fs_match_name(const struct inode *dir,
> > @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
> >   }
> >
> >   struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> > -                     const struct f2fs_filename *fname, int *max_slots)
> > +                     const struct f2fs_filename *fname, int *max_slots,
> > +                     bool use_hash)
> >   {
> >       struct f2fs_dir_entry *de;
> >       unsigned long bit_pos = 0;
> > @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >                       continue;
> >               }
> >
> > -             if (de->hash_code == fname->hash) {
> > +             if (!use_hash || de->hash_code == fname->hash) {
> >                       res = f2fs_match_name(d->inode, fname,
> >                                             d->filename[bit_pos],
> >                                             le16_to_cpu(de->name_len));
> > @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >   static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >                                       unsigned int level,
> >                                       const struct f2fs_filename *fname,
> > -                                     struct page **res_page)
> > +                                     struct page **res_page,
> > +                                     bool use_hash)
> >   {
> >       int s = GET_DENTRY_SLOTS(fname->disk_name.len);
> >       unsigned int nbucket, nblock;
> > -     unsigned int bidx, end_block;
> > +     unsigned int bidx, end_block, bucket_no;
> >       struct page *dentry_page;
> >       struct f2fs_dir_entry *de = NULL;
> >       pgoff_t next_pgofs;
> > @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >       nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
> >       nblock = bucket_blocks(level);
> >
> > +     bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
> > +
> > +start_find_bucket:
> >       bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
> > -                            le32_to_cpu(fname->hash) % nbucket);
> > +                            bucket_no);
> >       end_block = bidx + nblock;
> >
> >       while (bidx < end_block) {
> > @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >                       }
> >               }
> >
> > -             de = find_in_block(dir, dentry_page, fname, &max_slots);
> > +             de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
> >               if (IS_ERR(de)) {
> >                       *res_page = ERR_CAST(de);
> >                       de = NULL;
> > @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >               bidx++;
> >       }
> >
> > +     if (!use_hash && !de && ++bucket_no < nbucket)
> > +             goto start_find_bucket;
> > +
> >       if (!de && room && F2FS_I(dir)->chash != fname->hash) {
>
> Do we need to avoid accessing or assigning hash if use_hash is false?
>

Is it still worth keeping the hash for the creation if both hash-based
and linear searches failed?

> Thanks,
>
> >               F2FS_I(dir)->chash = fname->hash;
> >               F2FS_I(dir)->clevel = level;
> > @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
> >       struct f2fs_dir_entry *de = NULL;
> >       unsigned int max_depth;
> >       unsigned int level;
> > +     bool use_hash = true;
> >
> >       *res_page = NULL;
> >
> > +start_find_entry:
> >       if (f2fs_has_inline_dentry(dir)) {
> > -             de = f2fs_find_in_inline_dir(dir, fname, res_page);
> > +             de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
> >               goto out;
> >       }
> >
> > @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
> >       }
> >
> >       for (level = 0; level < max_depth; level++) {
> > -             de = find_in_level(dir, level, fname, res_page);
> > +             de = find_in_level(dir, level, fname, res_page, use_hash);
> >               if (de || IS_ERR(*res_page))
> >                       break;
> >       }
> > +
> >   out:
> > +#if IS_ENABLED(CONFIG_UNICODE)
> > +     if (IS_CASEFOLDED(dir) && !de && use_hash) {
> > +             use_hash = false;
> > +             goto start_find_entry;
> > +     }
> > +#endif
> >       /* This is to increase the speed of f2fs_create */
> >       if (!de)
> >               F2FS_I(dir)->task = current;
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index f523dd302bf6..1afebb9c4061 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
> >                       struct f2fs_filename *fname);
> >   void f2fs_free_filename(struct f2fs_filename *fname);
> >   struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> > -                     const struct f2fs_filename *fname, int *max_slots);
> > +                     const struct f2fs_filename *fname, int *max_slots,
> > +                     bool use_hash);
> >   int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
> >                       unsigned int start_pos, struct fscrypt_str *fstr);
> >   void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
> > @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
> >   int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
> >   struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >                                       const struct f2fs_filename *fname,
> > -                                     struct page **res_page);
> > +                                     struct page **res_page,
> > +                                     bool use_hash);
> >   int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
> >                       struct page *ipage);
> >   int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
> > diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> > index cbd2a0d34804..3e3c35d4c98b 100644
> > --- a/fs/f2fs/inline.c
> > +++ b/fs/f2fs/inline.c
> > @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
> >
> >   struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >                                       const struct f2fs_filename *fname,
> > -                                     struct page **res_page)
> > +                                     struct page **res_page,
> > +                                     bool use_hash)
> >   {
> >       struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
> >       struct f2fs_dir_entry *de;
> > @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >       inline_dentry = inline_data_addr(dir, ipage);
> >
> >       make_dentry_ptr_inline(dir, &d, inline_dentry);
> > -     de = f2fs_find_target_dentry(&d, fname, NULL);
> > +     de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
> >       unlock_page(ipage);
> >       if (IS_ERR(de)) {
> >               *res_page = ERR_CAST(de);
>

Chao Yu Dec. 20, 2024, 1:25 p.m. UTC | #3

On 2024/12/20 12:59, Daniel Lee wrote:
> On Thu, Dec 19, 2024 at 5:29 AM Chao Yu <chao@kernel.org> wrote:
>>
>> Hi Daniel,
>>
>> On 2024/12/17 15:55, Daniel Lee wrote:
>>> This patch addresses an issue where some files in case-insensitive
>>> directories become inaccessible due to changes in how the kernel function,
>>> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
>>> ("unicode: Don't special case ignorable code points").
>>>
>>> F2FS uses these case-folded names to calculate hash values for locating
>>> dentries and stores them on disk. Since utf8_casefold() can produce
>>> different output across kernel versions, stored hash values and newly
>>> calculated hash values may differ. This results in affected files no
>>> longer being found via the hash-based lookup.
>>>
>>> To resolve this, the patch introduces a linear search fallback.
>>> If the initial hash-based search fails, F2FS will sequentially scan the
>>> directory entries.
>>>
>>
>> Need a fixes line?
> 
> Thanks. I will add the commit 5c26d2f1d3f5 to the Fixes:
> 
>>
>>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
>>> Signed-off-by: Daniel Lee <chullee@google.com>
>>> ---
>>>    fs/f2fs/dir.c    | 38 ++++++++++++++++++++++++++++----------
>>>    fs/f2fs/f2fs.h   |  6 ++++--
>>>    fs/f2fs/inline.c |  5 +++--
>>>    3 files changed, 35 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>>> index 47a5c806cf16..a85d19b4e178 100644
>>> --- a/fs/f2fs/dir.c
>>> +++ b/fs/f2fs/dir.c
>>> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
>>>    static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>>>                                struct page *dentry_page,
>>>                                const struct f2fs_filename *fname,
>>> -                             int *max_slots)
>>> +                             int *max_slots,
>>> +                             bool use_hash)
>>>    {
>>>        struct f2fs_dentry_block *dentry_blk;
>>>        struct f2fs_dentry_ptr d;
>>> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>>>        dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
>>>
>>>        make_dentry_ptr_block(dir, &d, dentry_blk);
>>> -     return f2fs_find_target_dentry(&d, fname, max_slots);
>>> +     return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
>>>    }
>>>
>>>    static inline int f2fs_match_name(const struct inode *dir,
>>> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
>>>    }
>>>
>>>    struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>>> -                     const struct f2fs_filename *fname, int *max_slots)
>>> +                     const struct f2fs_filename *fname, int *max_slots,
>>> +                     bool use_hash)
>>>    {
>>>        struct f2fs_dir_entry *de;
>>>        unsigned long bit_pos = 0;
>>> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>>>                        continue;
>>>                }
>>>
>>> -             if (de->hash_code == fname->hash) {
>>> +             if (!use_hash || de->hash_code == fname->hash) {
>>>                        res = f2fs_match_name(d->inode, fname,
>>>                                              d->filename[bit_pos],
>>>                                              le16_to_cpu(de->name_len));
>>> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>>>    static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>>>                                        unsigned int level,
>>>                                        const struct f2fs_filename *fname,
>>> -                                     struct page **res_page)
>>> +                                     struct page **res_page,
>>> +                                     bool use_hash)
>>>    {
>>>        int s = GET_DENTRY_SLOTS(fname->disk_name.len);
>>>        unsigned int nbucket, nblock;
>>> -     unsigned int bidx, end_block;
>>> +     unsigned int bidx, end_block, bucket_no;
>>>        struct page *dentry_page;
>>>        struct f2fs_dir_entry *de = NULL;
>>>        pgoff_t next_pgofs;
>>> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>>>        nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
>>>        nblock = bucket_blocks(level);
>>>
>>> +     bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
>>> +
>>> +start_find_bucket:
>>>        bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
>>> -                            le32_to_cpu(fname->hash) % nbucket);
>>> +                            bucket_no);
>>>        end_block = bidx + nblock;
>>>
>>>        while (bidx < end_block) {
>>> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>>>                        }
>>>                }
>>>
>>> -             de = find_in_block(dir, dentry_page, fname, &max_slots);
>>> +             de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
>>>                if (IS_ERR(de)) {
>>>                        *res_page = ERR_CAST(de);
>>>                        de = NULL;
>>> @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>>>                bidx++;
>>>        }
>>>
>>> +     if (!use_hash && !de && ++bucket_no < nbucket)
>>> +             goto start_find_bucket;
>>> +
>>>        if (!de && room && F2FS_I(dir)->chash != fname->hash) {
>>
>> Do we need to avoid accessing or assigning hash if use_hash is false?
>>
> 
> Is it still worth keeping the hash for the creation if both hash-based
> and linear searches failed?

It needs to be kept for hash-based searching failure? since it was added to
enhance performance of file creation:
- open(..., O_CREAT)
  - f2fs_lookup
  : didn't find target dirent, then, record last chash & clevel
  - f2fs_create
  : create dirent in clevel bucket once chash matches

Thanks,

> 
>> Thanks,
>>
>>>                F2FS_I(dir)->chash = fname->hash;
>>>                F2FS_I(dir)->clevel = level;
>>> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>>>        struct f2fs_dir_entry *de = NULL;
>>>        unsigned int max_depth;
>>>        unsigned int level;
>>> +     bool use_hash = true;
>>>
>>>        *res_page = NULL;
>>>
>>> +start_find_entry:
>>>        if (f2fs_has_inline_dentry(dir)) {
>>> -             de = f2fs_find_in_inline_dir(dir, fname, res_page);
>>> +             de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
>>>                goto out;
>>>        }
>>>
>>> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>>>        }
>>>
>>>        for (level = 0; level < max_depth; level++) {
>>> -             de = find_in_level(dir, level, fname, res_page);
>>> +             de = find_in_level(dir, level, fname, res_page, use_hash);
>>>                if (de || IS_ERR(*res_page))
>>>                        break;
>>>        }
>>> +
>>>    out:
>>> +#if IS_ENABLED(CONFIG_UNICODE)
>>> +     if (IS_CASEFOLDED(dir) && !de && use_hash) {
>>> +             use_hash = false;
>>> +             goto start_find_entry;
>>> +     }
>>> +#endif
>>>        /* This is to increase the speed of f2fs_create */
>>>        if (!de)
>>>                F2FS_I(dir)->task = current;
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index f523dd302bf6..1afebb9c4061 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
>>>                        struct f2fs_filename *fname);
>>>    void f2fs_free_filename(struct f2fs_filename *fname);
>>>    struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>>> -                     const struct f2fs_filename *fname, int *max_slots);
>>> +                     const struct f2fs_filename *fname, int *max_slots,
>>> +                     bool use_hash);
>>>    int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
>>>                        unsigned int start_pos, struct fscrypt_str *fstr);
>>>    void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
>>> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
>>>    int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
>>>    struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>>>                                        const struct f2fs_filename *fname,
>>> -                                     struct page **res_page);
>>> +                                     struct page **res_page,
>>> +                                     bool use_hash);
>>>    int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>>>                        struct page *ipage);
>>>    int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>> index cbd2a0d34804..3e3c35d4c98b 100644
>>> --- a/fs/f2fs/inline.c
>>> +++ b/fs/f2fs/inline.c
>>> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>>>
>>>    struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>>>                                        const struct f2fs_filename *fname,
>>> -                                     struct page **res_page)
>>> +                                     struct page **res_page,
>>> +                                     bool use_hash)
>>>    {
>>>        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
>>>        struct f2fs_dir_entry *de;
>>> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>>>        inline_dentry = inline_data_addr(dir, ipage);
>>>
>>>        make_dentry_ptr_inline(dir, &d, inline_dentry);
>>> -     de = f2fs_find_target_dentry(&d, fname, NULL);
>>> +     de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
>>>        unlock_page(ipage);
>>>        if (IS_ERR(de)) {
>>>                *res_page = ERR_CAST(de);
>>

Daniel Lee Dec. 20, 2024, 5:04 p.m. UTC | #4

On Fri, Dec 20, 2024 at 5:25 AM Chao Yu <chao@kernel.org> wrote:
>
> On 2024/12/20 12:59, Daniel Lee wrote:
> > On Thu, Dec 19, 2024 at 5:29 AM Chao Yu <chao@kernel.org> wrote:
> >>
> >> Hi Daniel,
> >>
> >> On 2024/12/17 15:55, Daniel Lee wrote:
> >>> This patch addresses an issue where some files in case-insensitive
> >>> directories become inaccessible due to changes in how the kernel function,
> >>> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
> >>> ("unicode: Don't special case ignorable code points").
> >>>
> >>> F2FS uses these case-folded names to calculate hash values for locating
> >>> dentries and stores them on disk. Since utf8_casefold() can produce
> >>> different output across kernel versions, stored hash values and newly
> >>> calculated hash values may differ. This results in affected files no
> >>> longer being found via the hash-based lookup.
> >>>
> >>> To resolve this, the patch introduces a linear search fallback.
> >>> If the initial hash-based search fails, F2FS will sequentially scan the
> >>> directory entries.
> >>>
> >>
> >> Need a fixes line?
> >
> > Thanks. I will add the commit 5c26d2f1d3f5 to the Fixes:
> >
> >>
> >>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
> >>> Signed-off-by: Daniel Lee <chullee@google.com>
> >>> ---
> >>>    fs/f2fs/dir.c    | 38 ++++++++++++++++++++++++++++----------
> >>>    fs/f2fs/f2fs.h   |  6 ++++--
> >>>    fs/f2fs/inline.c |  5 +++--
> >>>    3 files changed, 35 insertions(+), 14 deletions(-)
> >>>
> >>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> >>> index 47a5c806cf16..a85d19b4e178 100644
> >>> --- a/fs/f2fs/dir.c
> >>> +++ b/fs/f2fs/dir.c
> >>> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
> >>>    static struct f2fs_dir_entry *find_in_block(struct inode *dir,
> >>>                                struct page *dentry_page,
> >>>                                const struct f2fs_filename *fname,
> >>> -                             int *max_slots)
> >>> +                             int *max_slots,
> >>> +                             bool use_hash)
> >>>    {
> >>>        struct f2fs_dentry_block *dentry_blk;
> >>>        struct f2fs_dentry_ptr d;
> >>> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
> >>>        dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
> >>>
> >>>        make_dentry_ptr_block(dir, &d, dentry_blk);
> >>> -     return f2fs_find_target_dentry(&d, fname, max_slots);
> >>> +     return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
> >>>    }
> >>>
> >>>    static inline int f2fs_match_name(const struct inode *dir,
> >>> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
> >>>    }
> >>>
> >>>    struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >>> -                     const struct f2fs_filename *fname, int *max_slots)
> >>> +                     const struct f2fs_filename *fname, int *max_slots,
> >>> +                     bool use_hash)
> >>>    {
> >>>        struct f2fs_dir_entry *de;
> >>>        unsigned long bit_pos = 0;
> >>> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >>>                        continue;
> >>>                }
> >>>
> >>> -             if (de->hash_code == fname->hash) {
> >>> +             if (!use_hash || de->hash_code == fname->hash) {
> >>>                        res = f2fs_match_name(d->inode, fname,
> >>>                                              d->filename[bit_pos],
> >>>                                              le16_to_cpu(de->name_len));
> >>> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >>>    static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >>>                                        unsigned int level,
> >>>                                        const struct f2fs_filename *fname,
> >>> -                                     struct page **res_page)
> >>> +                                     struct page **res_page,
> >>> +                                     bool use_hash)
> >>>    {
> >>>        int s = GET_DENTRY_SLOTS(fname->disk_name.len);
> >>>        unsigned int nbucket, nblock;
> >>> -     unsigned int bidx, end_block;
> >>> +     unsigned int bidx, end_block, bucket_no;
> >>>        struct page *dentry_page;
> >>>        struct f2fs_dir_entry *de = NULL;
> >>>        pgoff_t next_pgofs;
> >>> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >>>        nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
> >>>        nblock = bucket_blocks(level);
> >>>
> >>> +     bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
> >>> +
> >>> +start_find_bucket:
> >>>        bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
> >>> -                            le32_to_cpu(fname->hash) % nbucket);
> >>> +                            bucket_no);
> >>>        end_block = bidx + nblock;
> >>>
> >>>        while (bidx < end_block) {
> >>> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >>>                        }
> >>>                }
> >>>
> >>> -             de = find_in_block(dir, dentry_page, fname, &max_slots);
> >>> +             de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
> >>>                if (IS_ERR(de)) {
> >>>                        *res_page = ERR_CAST(de);
> >>>                        de = NULL;
> >>> @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >>>                bidx++;
> >>>        }
> >>>
> >>> +     if (!use_hash && !de && ++bucket_no < nbucket)
> >>> +             goto start_find_bucket;
> >>> +
> >>>        if (!de && room && F2FS_I(dir)->chash != fname->hash) {
> >>
> >> Do we need to avoid accessing or assigning hash if use_hash is false?
> >>
> >
> > Is it still worth keeping the hash for the creation if both hash-based
> > and linear searches failed?
>
> It needs to be kept for hash-based searching failure? since it was added to
> enhance performance of file creation:
> - open(..., O_CREAT)
>   - f2fs_lookup
>   : didn't find target dirent, then, record last chash & clevel
>   - f2fs_create
>   : create dirent in clevel bucket once chash matches
>

I understand your point and I will revise it accordingly.

> Thanks,
>
> >
> >> Thanks,
> >>
> >>>                F2FS_I(dir)->chash = fname->hash;
> >>>                F2FS_I(dir)->clevel = level;
> >>> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
> >>>        struct f2fs_dir_entry *de = NULL;
> >>>        unsigned int max_depth;
> >>>        unsigned int level;
> >>> +     bool use_hash = true;
> >>>
> >>>        *res_page = NULL;
> >>>
> >>> +start_find_entry:
> >>>        if (f2fs_has_inline_dentry(dir)) {
> >>> -             de = f2fs_find_in_inline_dir(dir, fname, res_page);
> >>> +             de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
> >>>                goto out;
> >>>        }
> >>>
> >>> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
> >>>        }
> >>>
> >>>        for (level = 0; level < max_depth; level++) {
> >>> -             de = find_in_level(dir, level, fname, res_page);
> >>> +             de = find_in_level(dir, level, fname, res_page, use_hash);
> >>>                if (de || IS_ERR(*res_page))
> >>>                        break;
> >>>        }
> >>> +
> >>>    out:
> >>> +#if IS_ENABLED(CONFIG_UNICODE)
> >>> +     if (IS_CASEFOLDED(dir) && !de && use_hash) {
> >>> +             use_hash = false;
> >>> +             goto start_find_entry;
> >>> +     }
> >>> +#endif
> >>>        /* This is to increase the speed of f2fs_create */
> >>>        if (!de)
> >>>                F2FS_I(dir)->task = current;
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index f523dd302bf6..1afebb9c4061 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
> >>>                        struct f2fs_filename *fname);
> >>>    void f2fs_free_filename(struct f2fs_filename *fname);
> >>>    struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >>> -                     const struct f2fs_filename *fname, int *max_slots);
> >>> +                     const struct f2fs_filename *fname, int *max_slots,
> >>> +                     bool use_hash);
> >>>    int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
> >>>                        unsigned int start_pos, struct fscrypt_str *fstr);
> >>>    void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
> >>> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
> >>>    int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
> >>>    struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >>>                                        const struct f2fs_filename *fname,
> >>> -                                     struct page **res_page);
> >>> +                                     struct page **res_page,
> >>> +                                     bool use_hash);
> >>>    int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
> >>>                        struct page *ipage);
> >>>    int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
> >>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >>> index cbd2a0d34804..3e3c35d4c98b 100644
> >>> --- a/fs/f2fs/inline.c
> >>> +++ b/fs/f2fs/inline.c
> >>> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
> >>>
> >>>    struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >>>                                        const struct f2fs_filename *fname,
> >>> -                                     struct page **res_page)
> >>> +                                     struct page **res_page,
> >>> +                                     bool use_hash)
> >>>    {
> >>>        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
> >>>        struct f2fs_dir_entry *de;
> >>> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >>>        inline_dentry = inline_data_addr(dir, ipage);
> >>>
> >>>        make_dentry_ptr_inline(dir, &d, inline_dentry);
> >>> -     de = f2fs_find_target_dentry(&d, fname, NULL);
> >>> +     de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
> >>>        unlock_page(ipage);
> >>>        if (IS_ERR(de)) {
> >>>                *res_page = ERR_CAST(de);
> >>
>

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 47a5c806cf16..a85d19b4e178 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -175,7 +175,8 @@  static unsigned long dir_block_index(unsigned int level,
 static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 				struct page *dentry_page,
 				const struct f2fs_filename *fname,
-				int *max_slots)
+				int *max_slots,
+				bool use_hash)
 {
 	struct f2fs_dentry_block *dentry_blk;
 	struct f2fs_dentry_ptr d;
@@ -183,7 +184,7 @@  static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 	dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
 
 	make_dentry_ptr_block(dir, &d, dentry_blk);
-	return f2fs_find_target_dentry(&d, fname, max_slots);
+	return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
 }
 
 static inline int f2fs_match_name(const struct inode *dir,
@@ -208,7 +209,8 @@  static inline int f2fs_match_name(const struct inode *dir,
 }
 
 struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
-			const struct f2fs_filename *fname, int *max_slots)
+			const struct f2fs_filename *fname, int *max_slots,
+			bool use_hash)
 {
 	struct f2fs_dir_entry *de;
 	unsigned long bit_pos = 0;
@@ -231,7 +233,7 @@  struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 			continue;
 		}
 
-		if (de->hash_code == fname->hash) {
+		if (!use_hash || de->hash_code == fname->hash) {
 			res = f2fs_match_name(d->inode, fname,
 					      d->filename[bit_pos],
 					      le16_to_cpu(de->name_len));
@@ -258,11 +260,12 @@  struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 					unsigned int level,
 					const struct f2fs_filename *fname,
-					struct page **res_page)
+					struct page **res_page,
+					bool use_hash)
 {
 	int s = GET_DENTRY_SLOTS(fname->disk_name.len);
 	unsigned int nbucket, nblock;
-	unsigned int bidx, end_block;
+	unsigned int bidx, end_block, bucket_no;
 	struct page *dentry_page;
 	struct f2fs_dir_entry *de = NULL;
 	pgoff_t next_pgofs;
@@ -272,8 +275,11 @@  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 	nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
 	nblock = bucket_blocks(level);
 
+	bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
+
+start_find_bucket:
 	bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
-			       le32_to_cpu(fname->hash) % nbucket);
+			       bucket_no);
 	end_block = bidx + nblock;
 
 	while (bidx < end_block) {
@@ -290,7 +296,7 @@  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 			}
 		}
 
-		de = find_in_block(dir, dentry_page, fname, &max_slots);
+		de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
 		if (IS_ERR(de)) {
 			*res_page = ERR_CAST(de);
 			de = NULL;
@@ -307,6 +313,9 @@  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 		bidx++;
 	}
 
+	if (!use_hash && !de && ++bucket_no < nbucket)
+		goto start_find_bucket;
+
 	if (!de && room && F2FS_I(dir)->chash != fname->hash) {
 		F2FS_I(dir)->chash = fname->hash;
 		F2FS_I(dir)->clevel = level;
@@ -323,11 +332,13 @@  struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 	struct f2fs_dir_entry *de = NULL;
 	unsigned int max_depth;
 	unsigned int level;
+	bool use_hash = true;
 
 	*res_page = NULL;
 
+start_find_entry:
 	if (f2fs_has_inline_dentry(dir)) {
-		de = f2fs_find_in_inline_dir(dir, fname, res_page);
+		de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
 		goto out;
 	}
 
@@ -343,11 +354,18 @@  struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 	}
 
 	for (level = 0; level < max_depth; level++) {
-		de = find_in_level(dir, level, fname, res_page);
+		de = find_in_level(dir, level, fname, res_page, use_hash);
 		if (de || IS_ERR(*res_page))
 			break;
 	}
+
 out:
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (IS_CASEFOLDED(dir) && !de && use_hash) {
+		use_hash = false;
+		goto start_find_entry;
+	}
+#endif
 	/* This is to increase the speed of f2fs_create */
 	if (!de)
 		F2FS_I(dir)->task = current;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f523dd302bf6..1afebb9c4061 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3588,7 +3588,8 @@  int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
 			struct f2fs_filename *fname);
 void f2fs_free_filename(struct f2fs_filename *fname);
 struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
-			const struct f2fs_filename *fname, int *max_slots);
+			const struct f2fs_filename *fname, int *max_slots,
+			bool use_hash);
 int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 			unsigned int start_pos, struct fscrypt_str *fstr);
 void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
@@ -4224,7 +4225,8 @@  int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
 int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 					const struct f2fs_filename *fname,
-					struct page **res_page);
+					struct page **res_page,
+					bool use_hash);
 int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
 			struct page *ipage);
 int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index cbd2a0d34804..3e3c35d4c98b 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -352,7 +352,8 @@  int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
 
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 					const struct f2fs_filename *fname,
-					struct page **res_page)
+					struct page **res_page,
+					bool use_hash)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	struct f2fs_dir_entry *de;
@@ -369,7 +370,7 @@  struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 	inline_dentry = inline_data_addr(dir, ipage);
 
 	make_dentry_ptr_inline(dir, &d, inline_dentry);
-	de = f2fs_find_target_dentry(&d, fname, NULL);
+	de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
 	unlock_page(ipage);
 	if (IS_ERR(de)) {
 		*res_page = ERR_CAST(de);

[f2fs-dev] f2fs: Introduce linear search for dentries

Commit Message

Comments

Patch