Message ID | 20241217075552.1167994-1-chullee@google.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [f2fs-dev] f2fs: Introduce linear search for dentries | expand |
Hi Daniel, On 2024/12/17 15:55, Daniel Lee wrote: > This patch addresses an issue where some files in case-insensitive > directories become inaccessible due to changes in how the kernel function, > utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5 > ("unicode: Don't special case ignorable code points"). > > F2FS uses these case-folded names to calculate hash values for locating > dentries and stores them on disk. Since utf8_casefold() can produce > different output across kernel versions, stored hash values and newly > calculated hash values may differ. This results in affected files no > longer being found via the hash-based lookup. > > To resolve this, the patch introduces a linear search fallback. > If the initial hash-based search fails, F2FS will sequentially scan the > directory entries. > Need a fixes line? > Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586 > Signed-off-by: Daniel Lee <chullee@google.com> > --- > fs/f2fs/dir.c | 38 ++++++++++++++++++++++++++++---------- > fs/f2fs/f2fs.h | 6 ++++-- > fs/f2fs/inline.c | 5 +++-- > 3 files changed, 35 insertions(+), 14 deletions(-) > > diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c > index 47a5c806cf16..a85d19b4e178 100644 > --- a/fs/f2fs/dir.c > +++ b/fs/f2fs/dir.c > @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level, > static struct f2fs_dir_entry *find_in_block(struct inode *dir, > struct page *dentry_page, > const struct f2fs_filename *fname, > - int *max_slots) > + int *max_slots, > + bool use_hash) > { > struct f2fs_dentry_block *dentry_blk; > struct f2fs_dentry_ptr d; > @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, > dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); > > make_dentry_ptr_block(dir, &d, dentry_blk); > - return f2fs_find_target_dentry(&d, fname, max_slots); > + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); > } > > static inline int f2fs_match_name(const struct inode *dir, > @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir, > } > > struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > - const struct f2fs_filename *fname, int *max_slots) > + const struct f2fs_filename *fname, int *max_slots, > + bool use_hash) > { > struct f2fs_dir_entry *de; > unsigned long bit_pos = 0; > @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > continue; > } > > - if (de->hash_code == fname->hash) { > + if (!use_hash || de->hash_code == fname->hash) { > res = f2fs_match_name(d->inode, fname, > d->filename[bit_pos], > le16_to_cpu(de->name_len)); > @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > static struct f2fs_dir_entry *find_in_level(struct inode *dir, > unsigned int level, > const struct f2fs_filename *fname, > - struct page **res_page) > + struct page **res_page, > + bool use_hash) > { > int s = GET_DENTRY_SLOTS(fname->disk_name.len); > unsigned int nbucket, nblock; > - unsigned int bidx, end_block; > + unsigned int bidx, end_block, bucket_no; > struct page *dentry_page; > struct f2fs_dir_entry *de = NULL; > pgoff_t next_pgofs; > @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); > nblock = bucket_blocks(level); > > + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; > + > +start_find_bucket: > bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, > - le32_to_cpu(fname->hash) % nbucket); > + bucket_no); > end_block = bidx + nblock; > > while (bidx < end_block) { > @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > } > } > > - de = find_in_block(dir, dentry_page, fname, &max_slots); > + de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash); > if (IS_ERR(de)) { > *res_page = ERR_CAST(de); > de = NULL; > @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > bidx++; > } > > + if (!use_hash && !de && ++bucket_no < nbucket) > + goto start_find_bucket; > + > if (!de && room && F2FS_I(dir)->chash != fname->hash) { Do we need to avoid accessing or assigning hash if use_hash is false? Thanks, > F2FS_I(dir)->chash = fname->hash; > F2FS_I(dir)->clevel = level; > @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, > struct f2fs_dir_entry *de = NULL; > unsigned int max_depth; > unsigned int level; > + bool use_hash = true; > > *res_page = NULL; > > +start_find_entry: > if (f2fs_has_inline_dentry(dir)) { > - de = f2fs_find_in_inline_dir(dir, fname, res_page); > + de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash); > goto out; > } > > @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, > } > > for (level = 0; level < max_depth; level++) { > - de = find_in_level(dir, level, fname, res_page); > + de = find_in_level(dir, level, fname, res_page, use_hash); > if (de || IS_ERR(*res_page)) > break; > } > + > out: > +#if IS_ENABLED(CONFIG_UNICODE) > + if (IS_CASEFOLDED(dir) && !de && use_hash) { > + use_hash = false; > + goto start_find_entry; > + } > +#endif > /* This is to increase the speed of f2fs_create */ > if (!de) > F2FS_I(dir)->task = current; > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index f523dd302bf6..1afebb9c4061 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, > struct f2fs_filename *fname); > void f2fs_free_filename(struct f2fs_filename *fname); > struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > - const struct f2fs_filename *fname, int *max_slots); > + const struct f2fs_filename *fname, int *max_slots, > + bool use_hash); > int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, > unsigned int start_pos, struct fscrypt_str *fstr); > void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, > @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio); > int f2fs_recover_inline_data(struct inode *inode, struct page *npage); > struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > const struct f2fs_filename *fname, > - struct page **res_page); > + struct page **res_page, > + bool use_hash); > int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, > struct page *ipage); > int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, > diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c > index cbd2a0d34804..3e3c35d4c98b 100644 > --- a/fs/f2fs/inline.c > +++ b/fs/f2fs/inline.c > @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) > > struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > const struct f2fs_filename *fname, > - struct page **res_page) > + struct page **res_page, > + bool use_hash) > { > struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); > struct f2fs_dir_entry *de; > @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > inline_dentry = inline_data_addr(dir, ipage); > > make_dentry_ptr_inline(dir, &d, inline_dentry); > - de = f2fs_find_target_dentry(&d, fname, NULL); > + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); > unlock_page(ipage); > if (IS_ERR(de)) { > *res_page = ERR_CAST(de);
On Thu, Dec 19, 2024 at 5:29 AM Chao Yu <chao@kernel.org> wrote: > > Hi Daniel, > > On 2024/12/17 15:55, Daniel Lee wrote: > > This patch addresses an issue where some files in case-insensitive > > directories become inaccessible due to changes in how the kernel function, > > utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5 > > ("unicode: Don't special case ignorable code points"). > > > > F2FS uses these case-folded names to calculate hash values for locating > > dentries and stores them on disk. Since utf8_casefold() can produce > > different output across kernel versions, stored hash values and newly > > calculated hash values may differ. This results in affected files no > > longer being found via the hash-based lookup. > > > > To resolve this, the patch introduces a linear search fallback. > > If the initial hash-based search fails, F2FS will sequentially scan the > > directory entries. > > > > Need a fixes line? Thanks. I will add the commit 5c26d2f1d3f5 to the Fixes: > > > Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586 > > Signed-off-by: Daniel Lee <chullee@google.com> > > --- > > fs/f2fs/dir.c | 38 ++++++++++++++++++++++++++++---------- > > fs/f2fs/f2fs.h | 6 ++++-- > > fs/f2fs/inline.c | 5 +++-- > > 3 files changed, 35 insertions(+), 14 deletions(-) > > > > diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c > > index 47a5c806cf16..a85d19b4e178 100644 > > --- a/fs/f2fs/dir.c > > +++ b/fs/f2fs/dir.c > > @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level, > > static struct f2fs_dir_entry *find_in_block(struct inode *dir, > > struct page *dentry_page, > > const struct f2fs_filename *fname, > > - int *max_slots) > > + int *max_slots, > > + bool use_hash) > > { > > struct f2fs_dentry_block *dentry_blk; > > struct f2fs_dentry_ptr d; > > @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, > > dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); > > > > make_dentry_ptr_block(dir, &d, dentry_blk); > > - return f2fs_find_target_dentry(&d, fname, max_slots); > > + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); > > } > > > > static inline int f2fs_match_name(const struct inode *dir, > > @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir, > > } > > > > struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > > - const struct f2fs_filename *fname, int *max_slots) > > + const struct f2fs_filename *fname, int *max_slots, > > + bool use_hash) > > { > > struct f2fs_dir_entry *de; > > unsigned long bit_pos = 0; > > @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > > continue; > > } > > > > - if (de->hash_code == fname->hash) { > > + if (!use_hash || de->hash_code == fname->hash) { > > res = f2fs_match_name(d->inode, fname, > > d->filename[bit_pos], > > le16_to_cpu(de->name_len)); > > @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > > static struct f2fs_dir_entry *find_in_level(struct inode *dir, > > unsigned int level, > > const struct f2fs_filename *fname, > > - struct page **res_page) > > + struct page **res_page, > > + bool use_hash) > > { > > int s = GET_DENTRY_SLOTS(fname->disk_name.len); > > unsigned int nbucket, nblock; > > - unsigned int bidx, end_block; > > + unsigned int bidx, end_block, bucket_no; > > struct page *dentry_page; > > struct f2fs_dir_entry *de = NULL; > > pgoff_t next_pgofs; > > @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > > nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); > > nblock = bucket_blocks(level); > > > > + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; > > + > > +start_find_bucket: > > bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, > > - le32_to_cpu(fname->hash) % nbucket); > > + bucket_no); > > end_block = bidx + nblock; > > > > while (bidx < end_block) { > > @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > > } > > } > > > > - de = find_in_block(dir, dentry_page, fname, &max_slots); > > + de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash); > > if (IS_ERR(de)) { > > *res_page = ERR_CAST(de); > > de = NULL; > > @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > > bidx++; > > } > > > > + if (!use_hash && !de && ++bucket_no < nbucket) > > + goto start_find_bucket; > > + > > if (!de && room && F2FS_I(dir)->chash != fname->hash) { > > Do we need to avoid accessing or assigning hash if use_hash is false? > Is it still worth keeping the hash for the creation if both hash-based and linear searches failed? > Thanks, > > > F2FS_I(dir)->chash = fname->hash; > > F2FS_I(dir)->clevel = level; > > @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, > > struct f2fs_dir_entry *de = NULL; > > unsigned int max_depth; > > unsigned int level; > > + bool use_hash = true; > > > > *res_page = NULL; > > > > +start_find_entry: > > if (f2fs_has_inline_dentry(dir)) { > > - de = f2fs_find_in_inline_dir(dir, fname, res_page); > > + de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash); > > goto out; > > } > > > > @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, > > } > > > > for (level = 0; level < max_depth; level++) { > > - de = find_in_level(dir, level, fname, res_page); > > + de = find_in_level(dir, level, fname, res_page, use_hash); > > if (de || IS_ERR(*res_page)) > > break; > > } > > + > > out: > > +#if IS_ENABLED(CONFIG_UNICODE) > > + if (IS_CASEFOLDED(dir) && !de && use_hash) { > > + use_hash = false; > > + goto start_find_entry; > > + } > > +#endif > > /* This is to increase the speed of f2fs_create */ > > if (!de) > > F2FS_I(dir)->task = current; > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index f523dd302bf6..1afebb9c4061 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, > > struct f2fs_filename *fname); > > void f2fs_free_filename(struct f2fs_filename *fname); > > struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > > - const struct f2fs_filename *fname, int *max_slots); > > + const struct f2fs_filename *fname, int *max_slots, > > + bool use_hash); > > int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, > > unsigned int start_pos, struct fscrypt_str *fstr); > > void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, > > @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio); > > int f2fs_recover_inline_data(struct inode *inode, struct page *npage); > > struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > > const struct f2fs_filename *fname, > > - struct page **res_page); > > + struct page **res_page, > > + bool use_hash); > > int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, > > struct page *ipage); > > int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, > > diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c > > index cbd2a0d34804..3e3c35d4c98b 100644 > > --- a/fs/f2fs/inline.c > > +++ b/fs/f2fs/inline.c > > @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) > > > > struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > > const struct f2fs_filename *fname, > > - struct page **res_page) > > + struct page **res_page, > > + bool use_hash) > > { > > struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); > > struct f2fs_dir_entry *de; > > @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > > inline_dentry = inline_data_addr(dir, ipage); > > > > make_dentry_ptr_inline(dir, &d, inline_dentry); > > - de = f2fs_find_target_dentry(&d, fname, NULL); > > + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); > > unlock_page(ipage); > > if (IS_ERR(de)) { > > *res_page = ERR_CAST(de); >
On 2024/12/20 12:59, Daniel Lee wrote: > On Thu, Dec 19, 2024 at 5:29 AM Chao Yu <chao@kernel.org> wrote: >> >> Hi Daniel, >> >> On 2024/12/17 15:55, Daniel Lee wrote: >>> This patch addresses an issue where some files in case-insensitive >>> directories become inaccessible due to changes in how the kernel function, >>> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5 >>> ("unicode: Don't special case ignorable code points"). >>> >>> F2FS uses these case-folded names to calculate hash values for locating >>> dentries and stores them on disk. Since utf8_casefold() can produce >>> different output across kernel versions, stored hash values and newly >>> calculated hash values may differ. This results in affected files no >>> longer being found via the hash-based lookup. >>> >>> To resolve this, the patch introduces a linear search fallback. >>> If the initial hash-based search fails, F2FS will sequentially scan the >>> directory entries. >>> >> >> Need a fixes line? > > Thanks. I will add the commit 5c26d2f1d3f5 to the Fixes: > >> >>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586 >>> Signed-off-by: Daniel Lee <chullee@google.com> >>> --- >>> fs/f2fs/dir.c | 38 ++++++++++++++++++++++++++++---------- >>> fs/f2fs/f2fs.h | 6 ++++-- >>> fs/f2fs/inline.c | 5 +++-- >>> 3 files changed, 35 insertions(+), 14 deletions(-) >>> >>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c >>> index 47a5c806cf16..a85d19b4e178 100644 >>> --- a/fs/f2fs/dir.c >>> +++ b/fs/f2fs/dir.c >>> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level, >>> static struct f2fs_dir_entry *find_in_block(struct inode *dir, >>> struct page *dentry_page, >>> const struct f2fs_filename *fname, >>> - int *max_slots) >>> + int *max_slots, >>> + bool use_hash) >>> { >>> struct f2fs_dentry_block *dentry_blk; >>> struct f2fs_dentry_ptr d; >>> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, >>> dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); >>> >>> make_dentry_ptr_block(dir, &d, dentry_blk); >>> - return f2fs_find_target_dentry(&d, fname, max_slots); >>> + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); >>> } >>> >>> static inline int f2fs_match_name(const struct inode *dir, >>> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir, >>> } >>> >>> struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, >>> - const struct f2fs_filename *fname, int *max_slots) >>> + const struct f2fs_filename *fname, int *max_slots, >>> + bool use_hash) >>> { >>> struct f2fs_dir_entry *de; >>> unsigned long bit_pos = 0; >>> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, >>> continue; >>> } >>> >>> - if (de->hash_code == fname->hash) { >>> + if (!use_hash || de->hash_code == fname->hash) { >>> res = f2fs_match_name(d->inode, fname, >>> d->filename[bit_pos], >>> le16_to_cpu(de->name_len)); >>> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, >>> static struct f2fs_dir_entry *find_in_level(struct inode *dir, >>> unsigned int level, >>> const struct f2fs_filename *fname, >>> - struct page **res_page) >>> + struct page **res_page, >>> + bool use_hash) >>> { >>> int s = GET_DENTRY_SLOTS(fname->disk_name.len); >>> unsigned int nbucket, nblock; >>> - unsigned int bidx, end_block; >>> + unsigned int bidx, end_block, bucket_no; >>> struct page *dentry_page; >>> struct f2fs_dir_entry *de = NULL; >>> pgoff_t next_pgofs; >>> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, >>> nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); >>> nblock = bucket_blocks(level); >>> >>> + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; >>> + >>> +start_find_bucket: >>> bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, >>> - le32_to_cpu(fname->hash) % nbucket); >>> + bucket_no); >>> end_block = bidx + nblock; >>> >>> while (bidx < end_block) { >>> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, >>> } >>> } >>> >>> - de = find_in_block(dir, dentry_page, fname, &max_slots); >>> + de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash); >>> if (IS_ERR(de)) { >>> *res_page = ERR_CAST(de); >>> de = NULL; >>> @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, >>> bidx++; >>> } >>> >>> + if (!use_hash && !de && ++bucket_no < nbucket) >>> + goto start_find_bucket; >>> + >>> if (!de && room && F2FS_I(dir)->chash != fname->hash) { >> >> Do we need to avoid accessing or assigning hash if use_hash is false? >> > > Is it still worth keeping the hash for the creation if both hash-based > and linear searches failed? It needs to be kept for hash-based searching failure? since it was added to enhance performance of file creation: - open(..., O_CREAT) - f2fs_lookup : didn't find target dirent, then, record last chash & clevel - f2fs_create : create dirent in clevel bucket once chash matches Thanks, > >> Thanks, >> >>> F2FS_I(dir)->chash = fname->hash; >>> F2FS_I(dir)->clevel = level; >>> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, >>> struct f2fs_dir_entry *de = NULL; >>> unsigned int max_depth; >>> unsigned int level; >>> + bool use_hash = true; >>> >>> *res_page = NULL; >>> >>> +start_find_entry: >>> if (f2fs_has_inline_dentry(dir)) { >>> - de = f2fs_find_in_inline_dir(dir, fname, res_page); >>> + de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash); >>> goto out; >>> } >>> >>> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, >>> } >>> >>> for (level = 0; level < max_depth; level++) { >>> - de = find_in_level(dir, level, fname, res_page); >>> + de = find_in_level(dir, level, fname, res_page, use_hash); >>> if (de || IS_ERR(*res_page)) >>> break; >>> } >>> + >>> out: >>> +#if IS_ENABLED(CONFIG_UNICODE) >>> + if (IS_CASEFOLDED(dir) && !de && use_hash) { >>> + use_hash = false; >>> + goto start_find_entry; >>> + } >>> +#endif >>> /* This is to increase the speed of f2fs_create */ >>> if (!de) >>> F2FS_I(dir)->task = current; >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>> index f523dd302bf6..1afebb9c4061 100644 >>> --- a/fs/f2fs/f2fs.h >>> +++ b/fs/f2fs/f2fs.h >>> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, >>> struct f2fs_filename *fname); >>> void f2fs_free_filename(struct f2fs_filename *fname); >>> struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, >>> - const struct f2fs_filename *fname, int *max_slots); >>> + const struct f2fs_filename *fname, int *max_slots, >>> + bool use_hash); >>> int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, >>> unsigned int start_pos, struct fscrypt_str *fstr); >>> void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, >>> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio); >>> int f2fs_recover_inline_data(struct inode *inode, struct page *npage); >>> struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, >>> const struct f2fs_filename *fname, >>> - struct page **res_page); >>> + struct page **res_page, >>> + bool use_hash); >>> int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, >>> struct page *ipage); >>> int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, >>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c >>> index cbd2a0d34804..3e3c35d4c98b 100644 >>> --- a/fs/f2fs/inline.c >>> +++ b/fs/f2fs/inline.c >>> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) >>> >>> struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, >>> const struct f2fs_filename *fname, >>> - struct page **res_page) >>> + struct page **res_page, >>> + bool use_hash) >>> { >>> struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); >>> struct f2fs_dir_entry *de; >>> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, >>> inline_dentry = inline_data_addr(dir, ipage); >>> >>> make_dentry_ptr_inline(dir, &d, inline_dentry); >>> - de = f2fs_find_target_dentry(&d, fname, NULL); >>> + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); >>> unlock_page(ipage); >>> if (IS_ERR(de)) { >>> *res_page = ERR_CAST(de); >>
On Fri, Dec 20, 2024 at 5:25 AM Chao Yu <chao@kernel.org> wrote: > > On 2024/12/20 12:59, Daniel Lee wrote: > > On Thu, Dec 19, 2024 at 5:29 AM Chao Yu <chao@kernel.org> wrote: > >> > >> Hi Daniel, > >> > >> On 2024/12/17 15:55, Daniel Lee wrote: > >>> This patch addresses an issue where some files in case-insensitive > >>> directories become inaccessible due to changes in how the kernel function, > >>> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5 > >>> ("unicode: Don't special case ignorable code points"). > >>> > >>> F2FS uses these case-folded names to calculate hash values for locating > >>> dentries and stores them on disk. Since utf8_casefold() can produce > >>> different output across kernel versions, stored hash values and newly > >>> calculated hash values may differ. This results in affected files no > >>> longer being found via the hash-based lookup. > >>> > >>> To resolve this, the patch introduces a linear search fallback. > >>> If the initial hash-based search fails, F2FS will sequentially scan the > >>> directory entries. > >>> > >> > >> Need a fixes line? > > > > Thanks. I will add the commit 5c26d2f1d3f5 to the Fixes: > > > >> > >>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586 > >>> Signed-off-by: Daniel Lee <chullee@google.com> > >>> --- > >>> fs/f2fs/dir.c | 38 ++++++++++++++++++++++++++++---------- > >>> fs/f2fs/f2fs.h | 6 ++++-- > >>> fs/f2fs/inline.c | 5 +++-- > >>> 3 files changed, 35 insertions(+), 14 deletions(-) > >>> > >>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c > >>> index 47a5c806cf16..a85d19b4e178 100644 > >>> --- a/fs/f2fs/dir.c > >>> +++ b/fs/f2fs/dir.c > >>> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level, > >>> static struct f2fs_dir_entry *find_in_block(struct inode *dir, > >>> struct page *dentry_page, > >>> const struct f2fs_filename *fname, > >>> - int *max_slots) > >>> + int *max_slots, > >>> + bool use_hash) > >>> { > >>> struct f2fs_dentry_block *dentry_blk; > >>> struct f2fs_dentry_ptr d; > >>> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, > >>> dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); > >>> > >>> make_dentry_ptr_block(dir, &d, dentry_blk); > >>> - return f2fs_find_target_dentry(&d, fname, max_slots); > >>> + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); > >>> } > >>> > >>> static inline int f2fs_match_name(const struct inode *dir, > >>> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir, > >>> } > >>> > >>> struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > >>> - const struct f2fs_filename *fname, int *max_slots) > >>> + const struct f2fs_filename *fname, int *max_slots, > >>> + bool use_hash) > >>> { > >>> struct f2fs_dir_entry *de; > >>> unsigned long bit_pos = 0; > >>> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > >>> continue; > >>> } > >>> > >>> - if (de->hash_code == fname->hash) { > >>> + if (!use_hash || de->hash_code == fname->hash) { > >>> res = f2fs_match_name(d->inode, fname, > >>> d->filename[bit_pos], > >>> le16_to_cpu(de->name_len)); > >>> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > >>> static struct f2fs_dir_entry *find_in_level(struct inode *dir, > >>> unsigned int level, > >>> const struct f2fs_filename *fname, > >>> - struct page **res_page) > >>> + struct page **res_page, > >>> + bool use_hash) > >>> { > >>> int s = GET_DENTRY_SLOTS(fname->disk_name.len); > >>> unsigned int nbucket, nblock; > >>> - unsigned int bidx, end_block; > >>> + unsigned int bidx, end_block, bucket_no; > >>> struct page *dentry_page; > >>> struct f2fs_dir_entry *de = NULL; > >>> pgoff_t next_pgofs; > >>> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > >>> nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); > >>> nblock = bucket_blocks(level); > >>> > >>> + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; > >>> + > >>> +start_find_bucket: > >>> bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, > >>> - le32_to_cpu(fname->hash) % nbucket); > >>> + bucket_no); > >>> end_block = bidx + nblock; > >>> > >>> while (bidx < end_block) { > >>> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > >>> } > >>> } > >>> > >>> - de = find_in_block(dir, dentry_page, fname, &max_slots); > >>> + de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash); > >>> if (IS_ERR(de)) { > >>> *res_page = ERR_CAST(de); > >>> de = NULL; > >>> @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, > >>> bidx++; > >>> } > >>> > >>> + if (!use_hash && !de && ++bucket_no < nbucket) > >>> + goto start_find_bucket; > >>> + > >>> if (!de && room && F2FS_I(dir)->chash != fname->hash) { > >> > >> Do we need to avoid accessing or assigning hash if use_hash is false? > >> > > > > Is it still worth keeping the hash for the creation if both hash-based > > and linear searches failed? > > It needs to be kept for hash-based searching failure? since it was added to > enhance performance of file creation: > - open(..., O_CREAT) > - f2fs_lookup > : didn't find target dirent, then, record last chash & clevel > - f2fs_create > : create dirent in clevel bucket once chash matches > I understand your point and I will revise it accordingly. > Thanks, > > > > >> Thanks, > >> > >>> F2FS_I(dir)->chash = fname->hash; > >>> F2FS_I(dir)->clevel = level; > >>> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, > >>> struct f2fs_dir_entry *de = NULL; > >>> unsigned int max_depth; > >>> unsigned int level; > >>> + bool use_hash = true; > >>> > >>> *res_page = NULL; > >>> > >>> +start_find_entry: > >>> if (f2fs_has_inline_dentry(dir)) { > >>> - de = f2fs_find_in_inline_dir(dir, fname, res_page); > >>> + de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash); > >>> goto out; > >>> } > >>> > >>> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, > >>> } > >>> > >>> for (level = 0; level < max_depth; level++) { > >>> - de = find_in_level(dir, level, fname, res_page); > >>> + de = find_in_level(dir, level, fname, res_page, use_hash); > >>> if (de || IS_ERR(*res_page)) > >>> break; > >>> } > >>> + > >>> out: > >>> +#if IS_ENABLED(CONFIG_UNICODE) > >>> + if (IS_CASEFOLDED(dir) && !de && use_hash) { > >>> + use_hash = false; > >>> + goto start_find_entry; > >>> + } > >>> +#endif > >>> /* This is to increase the speed of f2fs_create */ > >>> if (!de) > >>> F2FS_I(dir)->task = current; > >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > >>> index f523dd302bf6..1afebb9c4061 100644 > >>> --- a/fs/f2fs/f2fs.h > >>> +++ b/fs/f2fs/f2fs.h > >>> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, > >>> struct f2fs_filename *fname); > >>> void f2fs_free_filename(struct f2fs_filename *fname); > >>> struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, > >>> - const struct f2fs_filename *fname, int *max_slots); > >>> + const struct f2fs_filename *fname, int *max_slots, > >>> + bool use_hash); > >>> int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, > >>> unsigned int start_pos, struct fscrypt_str *fstr); > >>> void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, > >>> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio); > >>> int f2fs_recover_inline_data(struct inode *inode, struct page *npage); > >>> struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > >>> const struct f2fs_filename *fname, > >>> - struct page **res_page); > >>> + struct page **res_page, > >>> + bool use_hash); > >>> int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, > >>> struct page *ipage); > >>> int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, > >>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c > >>> index cbd2a0d34804..3e3c35d4c98b 100644 > >>> --- a/fs/f2fs/inline.c > >>> +++ b/fs/f2fs/inline.c > >>> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) > >>> > >>> struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > >>> const struct f2fs_filename *fname, > >>> - struct page **res_page) > >>> + struct page **res_page, > >>> + bool use_hash) > >>> { > >>> struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); > >>> struct f2fs_dir_entry *de; > >>> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, > >>> inline_dentry = inline_data_addr(dir, ipage); > >>> > >>> make_dentry_ptr_inline(dir, &d, inline_dentry); > >>> - de = f2fs_find_target_dentry(&d, fname, NULL); > >>> + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); > >>> unlock_page(ipage); > >>> if (IS_ERR(de)) { > >>> *res_page = ERR_CAST(de); > >> >
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 47a5c806cf16..a85d19b4e178 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level, static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct page *dentry_page, const struct f2fs_filename *fname, - int *max_slots) + int *max_slots, + bool use_hash) { struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(dir, &d, dentry_blk); - return f2fs_find_target_dentry(&d, fname, max_slots); + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); } static inline int f2fs_match_name(const struct inode *dir, @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir, } struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots) + const struct f2fs_filename *fname, int *max_slots, + bool use_hash) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, continue; } - if (de->hash_code == fname->hash) { + if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d->inode, fname, d->filename[bit_pos], le16_to_cpu(de->name_len)); @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const struct f2fs_filename *fname, - struct page **res_page) + struct page **res_page, + bool use_hash) { int s = GET_DENTRY_SLOTS(fname->disk_name.len); unsigned int nbucket, nblock; - unsigned int bidx, end_block; + unsigned int bidx, end_block, bucket_no; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; pgoff_t next_pgofs; @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; + +start_find_bucket: bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - le32_to_cpu(fname->hash) % nbucket); + bucket_no); end_block = bidx + nblock; while (bidx < end_block) { @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } } - de = find_in_block(dir, dentry_page, fname, &max_slots); + de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash); if (IS_ERR(de)) { *res_page = ERR_CAST(de); de = NULL; @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, bidx++; } + if (!use_hash && !de && ++bucket_no < nbucket) + goto start_find_bucket; + if (!de && room && F2FS_I(dir)->chash != fname->hash) { F2FS_I(dir)->chash = fname->hash; F2FS_I(dir)->clevel = level; @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, struct f2fs_dir_entry *de = NULL; unsigned int max_depth; unsigned int level; + bool use_hash = true; *res_page = NULL; +start_find_entry: if (f2fs_has_inline_dentry(dir)) { - de = f2fs_find_in_inline_dir(dir, fname, res_page); + de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash); goto out; } @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, fname, res_page); + de = find_in_level(dir, level, fname, res_page, use_hash); if (de || IS_ERR(*res_page)) break; } + out: +#if IS_ENABLED(CONFIG_UNICODE) + if (IS_CASEFOLDED(dir) && !de && use_hash) { + use_hash = false; + goto start_find_entry; + } +#endif /* This is to increase the speed of f2fs_create */ if (!de) F2FS_I(dir)->task = current; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f523dd302bf6..1afebb9c4061 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_filename *fname); void f2fs_free_filename(struct f2fs_filename *fname); struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots); + const struct f2fs_filename *fname, int *max_slots, + bool use_hash); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio); int f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page); + struct page **res_page, + bool use_hash); int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index cbd2a0d34804..3e3c35d4c98b 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page) + struct page **res_page, + bool use_hash) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_dir_entry *de; @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = f2fs_find_target_dentry(&d, fname, NULL); + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); unlock_page(ipage); if (IS_ERR(de)) { *res_page = ERR_CAST(de);
This patch addresses an issue where some files in case-insensitive directories become inaccessible due to changes in how the kernel function, utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points"). F2FS uses these case-folded names to calculate hash values for locating dentries and stores them on disk. Since utf8_casefold() can produce different output across kernel versions, stored hash values and newly calculated hash values may differ. This results in affected files no longer being found via the hash-based lookup. To resolve this, the patch introduces a linear search fallback. If the initial hash-based search fails, F2FS will sequentially scan the directory entries. Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586 Signed-off-by: Daniel Lee <chullee@google.com> --- fs/f2fs/dir.c | 38 ++++++++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/inline.c | 5 +++-- 3 files changed, 35 insertions(+), 14 deletions(-)