diff mbox series

[f2fs-dev,v5,01/10] fs: Expose helper to check if a directory needs casefolding

Message ID 20230812004146.30980-2-krisman@suse.de (mailing list archive)
State Superseded
Headers show
Series Support negative dentries on case-insensitive ext4 and f2fs | expand

Commit Message

Gabriel Krisman Bertazi Aug. 12, 2023, 12:41 a.m. UTC
In preparation to use it in ecryptfs, move needs_casefolding into a
public header and give it a namespaced name.

Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
---
 fs/libfs.c         | 14 ++------------
 include/linux/fs.h | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 12 deletions(-)

Comments

Eric Biggers Aug. 12, 2023, 1:59 a.m. UTC | #1
On Fri, Aug 11, 2023 at 08:41:37PM -0400, Gabriel Krisman Bertazi wrote:
> In preparation to use it in ecryptfs, move needs_casefolding into a
> public header and give it a namespaced name.
> 
> Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
> ---
>  fs/libfs.c         | 14 ++------------
>  include/linux/fs.h | 21 +++++++++++++++++++++
>  2 files changed, 23 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 5b851315eeed..8d0b64cfd5da 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -1381,16 +1381,6 @@ bool is_empty_dir_inode(struct inode *inode)
>  }
>  
>  #if IS_ENABLED(CONFIG_UNICODE)
> -/*
> - * Determine if the name of a dentry should be casefolded.
> - *
> - * Return: if names will need casefolding
> - */
> -static bool needs_casefold(const struct inode *dir)
> -{
> -	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
> -}
> -
>  /**
>   * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
>   * @dentry:	dentry whose name we are checking against
> @@ -1411,7 +1401,7 @@ static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
>  	char strbuf[DNAME_INLINE_LEN];
>  	int ret;
>  
> -	if (!dir || !needs_casefold(dir))
> +	if (!dir || !dir_is_casefolded(dir))
>  		goto fallback;
>  	/*
>  	 * If the dentry name is stored in-line, then it may be concurrently
> @@ -1453,7 +1443,7 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
>  	const struct unicode_map *um = sb->s_encoding;
>  	int ret = 0;
>  
> -	if (!dir || !needs_casefold(dir))
> +	if (!dir || !dir_is_casefolded(dir))
>  		return 0;
>  
>  	ret = utf8_casefold_hash(um, dentry, str);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 6867512907d6..e3b631c6d24a 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3213,6 +3213,27 @@ static inline bool dir_relax_shared(struct inode *inode)
>  	return !IS_DEADDIR(inode);
>  }
>  
> +/**
> + * dir_is_casefolded - Safely determine if filenames inside of a
> + * directory should be casefolded.
> + * @dir: The directory inode to be checked
> + *
> + * Filesystems should usually rely on this instead of checking the
> + * S_CASEFOLD flag directly when handling inodes, to avoid surprises
> + * with corrupted volumes.  Checking i_sb->s_encoding ensures the
> + * filesystem knows how to deal with case-insensitiveness.
> + *
> + * Return: if names will need casefolding
> + */
> +static inline bool dir_is_casefolded(const struct inode *dir)
> +{
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
> +#else
> +	return false;
> +#endif
> +}

To be honest I've always been confused about why the ->s_encoding check is
there.  It looks like Ted added it in 6456ca6520ab ("ext4: fix kernel oops
caused by spurious casefold flag") to address a fuzzing report for a filesystem
that had a casefolded directory but didn't have the casefold feature flag set.
It seems like an unnecessarily complex fix, though.  The filesystem should just
reject the inode earlier, in __ext4_iget().  And likewise for f2fs.  Then no
other code has to worry about this problem.

Actually, f2fs already does it, as I added it in commit f6322f3f1212:

        if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) {
                set_sbi_flag(sbi, SBI_NEED_FSCK);
                f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off",
                          __func__, inode->i_ino);
                return false;
        }

So just __ext4_iget() needs to be fixed.  I think we should consider doing that
before further entrenching all the extra ->s_encoding checks.

Also I don't understand why this needs to be part of your patch series anyway.
Shouldn't eCryptfs check IS_CASEFOLDED() anyway?

- Eric
Theodore Ts'o Aug. 12, 2023, 11:06 p.m. UTC | #2
On Fri, Aug 11, 2023 at 06:59:15PM -0700, Eric Biggers wrote:
> 
> To be honest I've always been confused about why the ->s_encoding check is
> there.  It looks like Ted added it in 6456ca6520ab ("ext4: fix kernel oops
> caused by spurious casefold flag") to address a fuzzing report for a filesystem
> that had a casefolded directory but didn't have the casefold feature flag set.
> It seems like an unnecessarily complex fix, though.  The filesystem should just
> reject the inode earlier, in __ext4_iget().  And likewise for f2fs.  Then no
> other code has to worry about this problem.

It's not enough to check it in ext4_iget, since the casefold flag can
get set *after* the inode has been fetched, but before you try to use
it.  This can happen because syzbot has opened the block device for
writing, and edits the superblock while it is mounted.

One could say that this is an insane threat model, but the syzbot team
thinks that this can be used to break out of a kernel lockdown after a
UEFI secure boot.  Which is fine, except I don't think I've been able
to get any company (including Google) to pay for headcount to fix
problems like this, and the unremitting stream of these sorts of
syzbot reports have already caused one major file system developer to
burn out and step down.

So problems like this get fixed on my own time, and when I have some
free time.  And if we "simplify" the code, it will inevitably cause
more syzbot reports, which I will then have to ignore, and the syzbot
team will write more "kernel security disaster" slide deck
presentations to senior VP's, although I'll note this has never
resulted in my getting any additional SWE's to help me fix the
problem...

> So just __ext4_iget() needs to be fixed.  I think we should consider doing that
> before further entrenching all the extra ->s_encoding checks.

If we can get an upstream kernel consensus that syzbot reports caused
by writing to a mounted file system aren't important, and we can
publish this somewhere where hopefully the syzbot team will pay
attention to it, sure...


						- Ted
Eric Biggers Aug. 13, 2023, 12:12 a.m. UTC | #3
On Sat, Aug 12, 2023 at 07:06:47PM -0400, Theodore Ts'o wrote:
> On Fri, Aug 11, 2023 at 06:59:15PM -0700, Eric Biggers wrote:
> > 
> > To be honest I've always been confused about why the ->s_encoding check is
> > there.  It looks like Ted added it in 6456ca6520ab ("ext4: fix kernel oops
> > caused by spurious casefold flag") to address a fuzzing report for a filesystem
> > that had a casefolded directory but didn't have the casefold feature flag set.
> > It seems like an unnecessarily complex fix, though.  The filesystem should just
> > reject the inode earlier, in __ext4_iget().  And likewise for f2fs.  Then no
> > other code has to worry about this problem.
> 
> the casefold flag can get set *after* the inode has been fetched, but before
> you try to use it.  This can happen because syzbot has opened the block device
> for writing, and edits the superblock while it is mounted.

I don't see how that is relevant here.

I think the actual problem you're hinting at is that checking the casefold
feature after the filesystem has been mounted is not guaranteed to work
properly, as ->s_encoding will be NULL if the casefold feature was not present
at mount time.  If we'd like to be robust in the event of the casefold feature
being concurrently enabled by a write to the block device, then all we need to
do is avoid checking the casefold feature after mount time, and instead check
->s_encoding.  I believe __ext4_iget() is still the only place it's needed.

> One could say that this is an insane threat model, but the syzbot team
> thinks that this can be used to break out of a kernel lockdown after a
> UEFI secure boot.  Which is fine, except I don't think I've been able
> to get any company (including Google) to pay for headcount to fix
> problems like this, and the unremitting stream of these sorts of
> syzbot reports have already caused one major file system developer to
> burn out and step down.
> 
> So problems like this get fixed on my own time, and when I have some
> free time.  And if we "simplify" the code, it will inevitably cause
> more syzbot reports, which I will then have to ignore, and the syzbot
> team will write more "kernel security disaster" slide deck
> presentations to senior VP's, although I'll note this has never
> resulted in my getting any additional SWE's to help me fix the
> problem...
> 
> > So just __ext4_iget() needs to be fixed.  I think we should consider doing that
> > before further entrenching all the extra ->s_encoding checks.
> 
> If we can get an upstream kernel consensus that syzbot reports caused
> by writing to a mounted file system aren't important, and we can
> publish this somewhere where hopefully the syzbot team will pay
> attention to it, sure...

But, more generally, I think it's clear that concurrent writes to the block
device's page cache is not something that filesystems can be robust against.  I
think this needs to be solved by providing an option to forbid this, as Jan
Kara's patchset "block: Add config option to not allow writing to mounted
devices" does, and then transitioning legacy use cases to new APIs.

Yes, "transitioning legacy use cases" will be a lot of work.  And if The Linux
Filesystem Maintainers(TM) do not have time for it, that's the way it is.
Someone who cares about it (such as someone who actually cares about the
potential impact on the Lockdown feature) will need to come along and do it.

But I think that should be the plan, and The Linux Filesystem Maintainers(TM) do
not need to try to play whack-a-mole with "fixing" filesystem code to be
consistently revalidating already-validated cached metadata.

- Eric
Matthew Wilcox Aug. 13, 2023, 3:08 a.m. UTC | #4
On Sat, Aug 12, 2023 at 07:06:47PM -0400, Theodore Ts'o wrote:
> One could say that this is an insane threat model, but the syzbot team
> thinks that this can be used to break out of a kernel lockdown after a
> UEFI secure boot.  Which is fine, except I don't think I've been able
> to get any company (including Google) to pay for headcount to fix
> problems like this, and the unremitting stream of these sorts of
> syzbot reports have already caused one major file system developer to
> burn out and step down.
> 
> So problems like this get fixed on my own time, and when I have some
> free time.  And if we "simplify" the code, it will inevitably cause
> more syzbot reports, which I will then have to ignore, and the syzbot
> team will write more "kernel security disaster" slide deck
> presentations to senior VP's, although I'll note this has never
> resulted in my getting any additional SWE's to help me fix the
> problem...
> 
> > So just __ext4_iget() needs to be fixed.  I think we should consider doing that
> > before further entrenching all the extra ->s_encoding checks.
> 
> If we can get an upstream kernel consensus that syzbot reports caused
> by writing to a mounted file system aren't important, and we can
> publish this somewhere where hopefully the syzbot team will pay
> attention to it, sure...

What the syzbot team don't seem to understand is that more bug reports
aren't better.  I used to investigate one most days, but the onslaught is
relentless and I just ignore syzbot reports now.  I appreciate maintainers
don't necessarily get that privilege.

They seem motivated to find new and exciting ways to break the kernel
without realising that they're sapping the will to work on the bugs that
we already have.

Hm.  Maybe this is a topic for kernel-summit?
Eric Biggers Aug. 13, 2023, 4:30 a.m. UTC | #5
On Sun, Aug 13, 2023 at 04:08:58AM +0100, Matthew Wilcox wrote:
> On Sat, Aug 12, 2023 at 07:06:47PM -0400, Theodore Ts'o wrote:
> > One could say that this is an insane threat model, but the syzbot team
> > thinks that this can be used to break out of a kernel lockdown after a
> > UEFI secure boot.  Which is fine, except I don't think I've been able
> > to get any company (including Google) to pay for headcount to fix
> > problems like this, and the unremitting stream of these sorts of
> > syzbot reports have already caused one major file system developer to
> > burn out and step down.
> > 
> > So problems like this get fixed on my own time, and when I have some
> > free time.  And if we "simplify" the code, it will inevitably cause
> > more syzbot reports, which I will then have to ignore, and the syzbot
> > team will write more "kernel security disaster" slide deck
> > presentations to senior VP's, although I'll note this has never
> > resulted in my getting any additional SWE's to help me fix the
> > problem...
> > 
> > > So just __ext4_iget() needs to be fixed.  I think we should consider doing that
> > > before further entrenching all the extra ->s_encoding checks.
> > 
> > If we can get an upstream kernel consensus that syzbot reports caused
> > by writing to a mounted file system aren't important, and we can
> > publish this somewhere where hopefully the syzbot team will pay
> > attention to it, sure...
> 
> What the syzbot team don't seem to understand is that more bug reports
> aren't better.  I used to investigate one most days, but the onslaught is
> relentless and I just ignore syzbot reports now.  I appreciate maintainers
> don't necessarily get that privilege.
> 
> They seem motivated to find new and exciting ways to break the kernel
> without realising that they're sapping the will to work on the bugs that
> we already have.
> 

Well, one thing that the kernel community can do to make things better is
identify when a large number of bug reports are caused by a single issue
("userspace can write to mounted block devices"), and do something about that
underlying issue (https://lore.kernel.org/r/20230704122727.17096-1-jack@suse.cz)
instead of trying to "fix" large numbers of individual "bugs".  We can have 1000
bugs or 1 bug, it is actually our choice in this case.

- Eric
Theodore Ts'o Aug. 14, 2023, 11:38 a.m. UTC | #6
On Sat, Aug 12, 2023 at 09:30:22PM -0700, Eric Biggers wrote:
> Well, one thing that the kernel community can do to make things better is
> identify when a large number of bug reports are caused by a single issue
> ("userspace can write to mounted block devices"), and do something about that
> underlying issue (https://lore.kernel.org/r/20230704122727.17096-1-jack@suse.cz)
> instead of trying to "fix" large numbers of individual "bugs".  We can have 1000
> bugs or 1 bug, it is actually our choice in this case.

That's assuming the syzbot folks are willing to enable the config in
Jan's patch.  The syzbot folks refused to enable it, unless the config
was gated on CONFIG_INSECURE, which I object to, because that's
presuming a threat model that we have not all agreed is valid.

Or rather, if it *is* valid some community members (or cough, cough,
**companies**) need to step up and supply patches.  As the saying
goes, "patches gratefully accepted".  It is *not* the maintainer's
responsibility to grant every single person whining about a feature
request, or even a bug fix.

       	       	       	       		   	  - Ted
Gabriel Krisman Bertazi Aug. 14, 2023, 3:02 p.m. UTC | #7
Eric Biggers <ebiggers@kernel.org> writes:

> On Fri, Aug 11, 2023 at 08:41:37PM -0400, Gabriel Krisman Bertazi wrote:
>> In preparation to use it in ecryptfs, move needs_casefolding into a
>> public header and give it a namespaced name.
>> 
>> Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
>> ---
>>  fs/libfs.c         | 14 ++------------
>>  include/linux/fs.h | 21 +++++++++++++++++++++
>>  2 files changed, 23 insertions(+), 12 deletions(-)
>> 
>> diff --git a/fs/libfs.c b/fs/libfs.c
>> index 5b851315eeed..8d0b64cfd5da 100644
>> --- a/fs/libfs.c
>> +++ b/fs/libfs.c
>> @@ -1381,16 +1381,6 @@ bool is_empty_dir_inode(struct inode *inode)
>>  }
>>  
>>  #if IS_ENABLED(CONFIG_UNICODE)
>> -/*
>> - * Determine if the name of a dentry should be casefolded.
>> - *
>> - * Return: if names will need casefolding
>> - */
>> -static bool needs_casefold(const struct inode *dir)
>> -{
>> -	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
>> -}
>> -
>>  /**
>>   * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
>>   * @dentry:	dentry whose name we are checking against
>> @@ -1411,7 +1401,7 @@ static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
>>  	char strbuf[DNAME_INLINE_LEN];
>>  	int ret;
>>  
>> -	if (!dir || !needs_casefold(dir))
>> +	if (!dir || !dir_is_casefolded(dir))
>>  		goto fallback;
>>  	/*
>>  	 * If the dentry name is stored in-line, then it may be concurrently
>> @@ -1453,7 +1443,7 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
>>  	const struct unicode_map *um = sb->s_encoding;
>>  	int ret = 0;
>>  
>> -	if (!dir || !needs_casefold(dir))
>> +	if (!dir || !dir_is_casefolded(dir))
>>  		return 0;
>>  
>>  	ret = utf8_casefold_hash(um, dentry, str);
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 6867512907d6..e3b631c6d24a 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -3213,6 +3213,27 @@ static inline bool dir_relax_shared(struct inode *inode)
>>  	return !IS_DEADDIR(inode);
>>  }
>>  
>> +/**
>> + * dir_is_casefolded - Safely determine if filenames inside of a
>> + * directory should be casefolded.
>> + * @dir: The directory inode to be checked
>> + *
>> + * Filesystems should usually rely on this instead of checking the
>> + * S_CASEFOLD flag directly when handling inodes, to avoid surprises
>> + * with corrupted volumes.  Checking i_sb->s_encoding ensures the
>> + * filesystem knows how to deal with case-insensitiveness.
>> + *
>> + * Return: if names will need casefolding
>> + */
>> +static inline bool dir_is_casefolded(const struct inode *dir)
>> +{
>> +#if IS_ENABLED(CONFIG_UNICODE)
>> +	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
>> +#else
>> +	return false;
>> +#endif
>> +}
>
> To be honest I've always been confused about why the ->s_encoding check is
> there.  It looks like Ted added it in 6456ca6520ab ("ext4: fix kernel oops
> caused by spurious casefold flag") to address a fuzzing report for a filesystem
> that had a casefolded directory but didn't have the casefold feature flag set.
> It seems like an unnecessarily complex fix, though.  The filesystem should just
> reject the inode earlier, in __ext4_iget().  And likewise for f2fs.  Then no
> other code has to worry about this problem.
>
> Actually, f2fs already does it, as I added it in commit f6322f3f1212:
>
>         if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) {
>                 set_sbi_flag(sbi, SBI_NEED_FSCK);
>                 f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off",
>                           __func__, inode->i_ino);
>                 return false;
>         }
>
> So just __ext4_iget() needs to be fixed.  I think we should consider doing that
> before further entrenching all the extra ->s_encoding checks.
>
> Also I don't understand why this needs to be part of your patch series anyway.
> Shouldn't eCryptfs check IS_CASEFOLDED() anyway?

While I agree with Ted's point about how this change is tiny compared to
the benefit of preventing casefold-related superblock corruptions on
runtime (and we want to keep it being done in ext4),  I also agree with
you that we don't need to check it also in ecryptfs.  But, I'll preserve
it in d_revalidate, since it is what we are currently doing in
d_compare/d_hash.

Also, this patchset has been sitting for years before the latest
discussions, and I'm tired of it, so I'm happy to keep this
discussion for another time.  Will drop this patch and just check
IS_CASEFOLDED in ecryptfs for the next iteration.

I'll follow up with another case-insensitive cleanup patchset I've been
siting on forever, which includes this patch and will restart this
discussion, among others.
Eric Biggers Aug. 14, 2023, 5:22 p.m. UTC | #8
On Mon, Aug 14, 2023 at 07:38:52AM -0400, Theodore Ts'o wrote:
> On Sat, Aug 12, 2023 at 09:30:22PM -0700, Eric Biggers wrote:
> > Well, one thing that the kernel community can do to make things better is
> > identify when a large number of bug reports are caused by a single issue
> > ("userspace can write to mounted block devices"), and do something about that
> > underlying issue (https://lore.kernel.org/r/20230704122727.17096-1-jack@suse.cz)
> > instead of trying to "fix" large numbers of individual "bugs".  We can have 1000
> > bugs or 1 bug, it is actually our choice in this case.
> 
> That's assuming the syzbot folks are willing to enable the config in
> Jan's patch.  The syzbot folks refused to enable it, unless the config
> was gated on CONFIG_INSECURE, which I object to, because that's
> presuming a threat model that we have not all agreed is valid.
> 
> Or rather, if it *is* valid some community members (or cough, cough,
> **companies**) need to step up and supply patches.  As the saying
> goes, "patches gratefully accepted".  It is *not* the maintainer's
> responsibility to grant every single person whining about a feature
> request, or even a bug fix.

They did disable CONFIG_XFS_SUPPORT_V4.  Yes, there was some pushback, but they
are very understandably (and correctly) concerned about ending up in a situation
where buggy code is disabled for syzkaller but enabled for everyone else.  They
eventually did accept the proposal to disable CONFIG_XFS_SUPPORT_V4, for reasons
including the fact that there is a concrete deprecation plan.  (FWIW, the XFS
maintainers had also pushed back strongly when I suggested adding a kconfig
option for XFS v4 support back in 2018.  Sometimes these things just take time.)

Anyway, syzkaller is just the messenger that is reminding us of a problem.  The
actual problem here is that "make filesystems robust against concurrent changes
to block device's page cache" is effectively unsolvable.  *Every* memory access
to the cache would need to use READ_ONCE/WRITE_ONCE, and *every* re-read of
every field would need to be fully re-validated.  PageChecked would need to go
away for metadata, as it would be invalid to cache the checked status at all.
There's basically zero chance of getting this correct; filesystems struggle to
validate even the metadata read from disk correctly, so how are they going to
successfully continually revalidate all cached metadata in memory?

I don't understand why we would waste time trying to do that instead of focusing
on an actual solution.  Sure, probably The Linux Filesystem Maintainers(TM)
don't have time to help with migrating legacy use cases of writing to mounted
block devices, but that just means that someone needs to step up to do it.  It
doesn't mean that they need to instead waste time on pointless "fixes".

Keep in mind, the syzkaller team isn't asking for these pointless "fixes"
either.  They'd very much prefer 1 fix to 1000 fixes.  I think some confusion
might be arising from the very different types of problems that syzkaller finds.
Sometimes 1 syzkaller report == 1 bug == 1 high-priority "must fix" bug == 1
vulnerability == 1 fix needed.  But in general syzkaller is just letting kernel
developers know about a problem, and it is up to them to decide what to do about
it.  In this case there is one underlying issue that needs to be fixed, and the
individual syzkaller reports that result from that issue are not important.

- Eric
Eric Biggers Aug. 14, 2023, 7:14 p.m. UTC | #9
On Mon, Aug 14, 2023 at 11:02:38AM -0400, Gabriel Krisman Bertazi wrote:
> 
> Also, this patchset has been sitting for years before the latest
> discussions, and I'm tired of it, so I'm happy to keep this
> discussion for another time.  Will drop this patch and just check
> IS_CASEFOLDED in ecryptfs for the next iteration.
> 
> I'll follow up with another case-insensitive cleanup patchset I've been
> siting on forever, which includes this patch and will restart this
> discussion, among others.
> 

Well, as we know unfortunately filesystem developers are in short supply, and
doing proper reviews (identifying issues and working closely with the patchset
author over multiple iterations to address them, as opposed to just slapping on
a Reviewed-by) is very time consuming.  Earlier this year I tried to get the
Android Systems team, which is ostensibly responsible for Android's use of
casefolding, to take a look, but their entire feedback was just "looks good to
me".  Also, the fact that this patchset originally excluded the casefold+encrypt
case technically made it not applicable to Android, and discouraged me from
taking a look since encryption is my focus.  Sorry for not taking a look sooner.

Anyway, thanks for doing this, and I think it's near the finish line now.  Once
you address the latest feedback and get a couple acks, I think that Christian
should take this through the VFS tree.  BTW, in my opinion, as the maintainer of
the "Unicode subsystem" you are also authorized to send a pull request for this
to Linus yourself.  But VFS does seem ideal in this case, given the diffstat,
and Christian has been fairly active with taking patches.

- Eric
Gabriel Krisman Bertazi Aug. 14, 2023, 7:26 p.m. UTC | #10
Eric Biggers <ebiggers@kernel.org> writes:

> On Mon, Aug 14, 2023 at 11:02:38AM -0400, Gabriel Krisman Bertazi wrote:
>> 
>> Also, this patchset has been sitting for years before the latest
>> discussions, and I'm tired of it, so I'm happy to keep this
>> discussion for another time.  Will drop this patch and just check
>> IS_CASEFOLDED in ecryptfs for the next iteration.
>> 
>> I'll follow up with another case-insensitive cleanup patchset I've been
>> siting on forever, which includes this patch and will restart this
>> discussion, among others.
>> 
>
> Well, as we know unfortunately filesystem developers are in short supply, and
> doing proper reviews (identifying issues and working closely with the patchset
> author over multiple iterations to address them, as opposed to just slapping on
> a Reviewed-by) is very time consuming.  Earlier this year I tried to get the
> Android Systems team, which is ostensibly responsible for Android's use of
> casefolding, to take a look, but their entire feedback was just "looks good to
> me".  Also, the fact that this patchset originally excluded the casefold+encrypt
> case technically made it not applicable to Android, and discouraged me from
> taking a look since encryption is my focus.  Sorry for not taking a look sooner.
>
> Anyway, thanks for doing this, and I think it's near the finish line now.  Once

On the contrary, thank *you* for your review. I always appreciate your
feedback, particularly since you are always able to find the corner
cases I missed. That, and your responsiveness, are the reasons I always
put you in my --cc list since v1 for anything related to unicode/fs :)
Theodore Ts'o Aug. 15, 2023, 3:59 a.m. UTC | #11
On Mon, Aug 14, 2023 at 10:22:44AM -0700, Eric Biggers wrote:
> 
> Keep in mind, the syzkaller team isn't asking for these pointless "fixes"
> either.  They'd very much prefer 1 fix to 1000 fixes.  I think some confusion
> might be arising from the very different types of problems that syzkaller finds.
> Sometimes 1 syzkaller report == 1 bug == 1 high-priority "must fix" bug == 1
> vulnerability == 1 fix needed.  But in general syzkaller is just letting kernel
> developers know about a problem, and it is up to them to decide what to do about
> it.  In this case there is one underlying issue that needs to be fixed, and the
> individual syzkaller reports that result from that issue are not important.

... except that the Syzkaller folks have created slide decks talking
about "Linux kernel security disaster", blaming the entire community,
where they quote the number unresolved syzkaller reports, without the
kind of nuance that you are referring to.

There is also not a great way of categorizing syzkaller reports as
"requires maliciously fuzzed file system image", or "writing to
mounted file system" --- either manually, or (ideally) automatically,
since the syzbot test generators knows what they are doing.

And finally, the reality is even if someone where to fix the "one
underlying issue", the reality is that it will be ten years or so
before said fixed can be rolled out, since it requires changes in
userspace utilities, as well as rolled out kernels, and enterprise
distros are around for a decade; even community distros need to be
supported for at least 3-5 years.

Finally, it's not just "one underlying issue"; there are also
"maliciously fuzzed file systems", and working around those syzbot
reports can be quite painful, especially the ones that lead to lockdep
deadlock reports.  Many of these are spurious, caused by an inode
being used in two contexts, that can only happen in a badly corrupted
file system, and for which we've already signalled that the file
system is corrupted, so if you panic on error, it wouldn't deadlock.
(And if you deadlock, it's not _that_ much worse than panicing on a
maliciously fuzzed file system.)  And all of these bugs get counted,
one for each lockdep report variation (so there can be 3-4 per root
cause) as a "security bug" in the "Linux kernel security disaster"
statistics.

I might not mind the hyperbole if said slide decks asked for more
headcount.  But instead, they blame the "Linux upstream community" for
not fixing bugs, or treating the bugs seriously.   Sigh....

  	    	    	       	      	       - Ted
diff mbox series

Patch

diff --git a/fs/libfs.c b/fs/libfs.c
index 5b851315eeed..8d0b64cfd5da 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1381,16 +1381,6 @@  bool is_empty_dir_inode(struct inode *inode)
 }
 
 #if IS_ENABLED(CONFIG_UNICODE)
-/*
- * Determine if the name of a dentry should be casefolded.
- *
- * Return: if names will need casefolding
- */
-static bool needs_casefold(const struct inode *dir)
-{
-	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
-}
-
 /**
  * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
  * @dentry:	dentry whose name we are checking against
@@ -1411,7 +1401,7 @@  static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
 	char strbuf[DNAME_INLINE_LEN];
 	int ret;
 
-	if (!dir || !needs_casefold(dir))
+	if (!dir || !dir_is_casefolded(dir))
 		goto fallback;
 	/*
 	 * If the dentry name is stored in-line, then it may be concurrently
@@ -1453,7 +1443,7 @@  static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
 	const struct unicode_map *um = sb->s_encoding;
 	int ret = 0;
 
-	if (!dir || !needs_casefold(dir))
+	if (!dir || !dir_is_casefolded(dir))
 		return 0;
 
 	ret = utf8_casefold_hash(um, dentry, str);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..e3b631c6d24a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3213,6 +3213,27 @@  static inline bool dir_relax_shared(struct inode *inode)
 	return !IS_DEADDIR(inode);
 }
 
+/**
+ * dir_is_casefolded - Safely determine if filenames inside of a
+ * directory should be casefolded.
+ * @dir: The directory inode to be checked
+ *
+ * Filesystems should usually rely on this instead of checking the
+ * S_CASEFOLD flag directly when handling inodes, to avoid surprises
+ * with corrupted volumes.  Checking i_sb->s_encoding ensures the
+ * filesystem knows how to deal with case-insensitiveness.
+ *
+ * Return: if names will need casefolding
+ */
+static inline bool dir_is_casefolded(const struct inode *dir)
+{
+#if IS_ENABLED(CONFIG_UNICODE)
+	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
+#else
+	return false;
+#endif
+}
+
 extern bool path_noexec(const struct path *path);
 extern void inode_nohighmem(struct inode *inode);