diff mbox series

[4/8] vfs: Fold casefolding into vfs

Message ID 20191203051049.44573-5-drosen@google.com (mailing list archive)
State New, archived
Headers show
Series Support for Casefolding and Encryption | expand

Commit Message

Daniel Rosenberg Dec. 3, 2019, 5:10 a.m. UTC
Ext4 and F2fs are both using casefolding, and they, along with any other
filesystem that adds the feature, will be using identical dentry_ops.
Additionally, those dentry ops interfere with the dentry_ops required
for fscrypt once we add support for casefolding and encryption.
Moving this into the vfs removes code duplication as well as the
complication with encryption.

Currently this is pretty close to just moving the existing f2fs/ext4
code up a level into the vfs, although there is a lot of room for
improvement now.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 fs/dcache.c        | 35 +++++++++++++++++++++++++++++++++++
 fs/namei.c         | 43 ++++++++++++++++++++++++++++++++++++++++---
 include/linux/fs.h | 12 ++++++++++++
 3 files changed, 87 insertions(+), 3 deletions(-)

Comments

Gao Xiang Dec. 3, 2019, 7:41 a.m. UTC | #1
On Mon, Dec 02, 2019 at 09:10:45PM -0800, Daniel Rosenberg wrote:
> Ext4 and F2fs are both using casefolding, and they, along with any other
> filesystem that adds the feature, will be using identical dentry_ops.
> Additionally, those dentry ops interfere with the dentry_ops required
> for fscrypt once we add support for casefolding and encryption.
> Moving this into the vfs removes code duplication as well as the
> complication with encryption.
> 
> Currently this is pretty close to just moving the existing f2fs/ext4
> code up a level into the vfs, although there is a lot of room for
> improvement now.
> 
> Signed-off-by: Daniel Rosenberg <drosen@google.com>

I'm afraid that such vfs modification is unneeded.

Just a quick glance it seems just can be replaced by introducing some
.d_cmp, .d_hash helpers (or with little modification) and most non-Android
emulated storage files are not casefolded (even in Android).

"those dentry ops interfere with the dentry_ops required for fscrypt",
I don't think it's a real diffculty and it could be done with some
better approach instead.

Thanks,
Gao Xiang
Gabriel Krisman Bertazi Dec. 3, 2019, 7:31 p.m. UTC | #2
Daniel Rosenberg <drosen@google.com> writes:

> Ext4 and F2fs are both using casefolding, and they, along with any other
> filesystem that adds the feature, will be using identical dentry_ops.
> Additionally, those dentry ops interfere with the dentry_ops required
> for fscrypt once we add support for casefolding and encryption.
> Moving this into the vfs removes code duplication as well as the
> complication with encryption.
>
> Currently this is pretty close to just moving the existing f2fs/ext4
> code up a level into the vfs, although there is a lot of room for
> improvement now.
>
> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> ---
>  fs/dcache.c        | 35 +++++++++++++++++++++++++++++++++++
>  fs/namei.c         | 43 ++++++++++++++++++++++++++++++++++++++++---
>  include/linux/fs.h | 12 ++++++++++++
>  3 files changed, 87 insertions(+), 3 deletions(-)
>
> diff --git a/fs/dcache.c b/fs/dcache.c
> index f7931b682a0d..575f3c2c3f77 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -32,6 +32,7 @@
>  #include <linux/bit_spinlock.h>
>  #include <linux/rculist_bl.h>
>  #include <linux/list_lru.h>
> +#include <linux/unicode.h>
>  #include "internal.h"
>  #include "mount.h"
>  
> @@ -228,6 +229,13 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
>  
>  #endif
>  
> +bool needs_casefold(const struct inode *dir)
> +{
> +	return IS_CASEFOLDED(dir) &&
> +			(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
> +}
> +EXPORT_SYMBOL(needs_casefold);
> +
>  static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
>  {
>  	/*
> @@ -247,7 +255,19 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
>  	 * be no NUL in the ct/tcount data)
>  	 */
>  	const unsigned char *cs = READ_ONCE(dentry->d_name.name);
> +#ifdef CONFIG_UNICODE
> +	struct inode *parent = dentry->d_parent->d_inode;
>  
> +	if (unlikely(needs_casefold(parent))) {
> +		const struct qstr n1 = QSTR_INIT(cs, tcount);
> +		const struct qstr n2 = QSTR_INIT(ct, tcount);
> +		int result = utf8_strncasecmp(dentry->d_sb->s_encoding,
> +						&n1, &n2);
> +
> +		if (result >= 0 || sb_has_enc_strict_mode(dentry->d_sb))
> +			return result;
> +	}
> +#endif
>  	return dentry_string_cmp(cs, ct, tcount);
>  }
>  
> @@ -2404,7 +2424,22 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
>  	 * calculate the standard hash first, as the d_op->d_hash()
>  	 * routine may choose to leave the hash value unchanged.
>  	 */
> +#ifdef CONFIG_UNICODE
> +	unsigned char *hname = NULL;
> +	int hlen = name->len;
> +
> +	if (IS_CASEFOLDED(dir->d_inode)) {
> +		hname = kmalloc(PATH_MAX, GFP_ATOMIC);
> +		if (!hname)
> +			return ERR_PTR(-ENOMEM);
> +		hlen = utf8_casefold(dir->d_sb->s_encoding,
> +					name, hname, PATH_MAX);
> +	}
> +	name->hash = full_name_hash(dir, hname ?: name->name, hlen);
> +	kfree(hname);
> +#else
>  	name->hash = full_name_hash(dir, name->name, name->len);
> +#endif
>  	if (dir->d_flags & DCACHE_OP_HASH) {
>  		int err = dir->d_op->d_hash(dir, name);
>  		if (unlikely(err < 0))
> diff --git a/fs/namei.c b/fs/namei.c
> index 2dda552bcf7a..b8d5cb0994ec 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -39,6 +39,7 @@
>  #include <linux/bitops.h>
>  #include <linux/init_task.h>
>  #include <linux/uaccess.h>
> +#include <linux/unicode.h>
>  
>  #include "internal.h"
>  #include "mount.h"
> @@ -2062,6 +2063,10 @@ static inline u64 hash_name(const void *salt, const char *name)
>  static int link_path_walk(const char *name, struct nameidata *nd)
>  {
>  	int err;
> +#ifdef CONFIG_UNICODE
> +	char *hname = NULL;
> +	int hlen = 0;
> +#endif
>  
>  	if (IS_ERR(name))
>  		return PTR_ERR(name);
> @@ -2078,9 +2083,21 @@ static int link_path_walk(const char *name, struct nameidata *nd)
>  		err = may_lookup(nd);
>  		if (err)
>  			return err;
> -
> +#ifdef CONFIG_UNICODE
> +		if (needs_casefold(nd->path.dentry->d_inode)) {
> +			struct qstr str = QSTR_INIT(name, PATH_MAX);
> +
> +			hname = kmalloc(PATH_MAX, GFP_ATOMIC);
> +			if (!hname)
> +				return -ENOMEM;
> +			hlen = utf8_casefold(nd->path.dentry->d_sb->s_encoding,
> +						&str, hname, PATH_MAX);
> +		}
> +		hash_len = hash_name(nd->path.dentry, hname ?: name);
> +		kfree(hname);

It would be nice to reuse the memory allocation for the entire path walk
instead of allocating and freeing several times in a row.  Still not
ideal, but I don't see how we could not have at least one allocation here.

> +#else
>  		hash_len = hash_name(nd->path.dentry, name);
> -
> +#endif
>  		type = LAST_NORM;
>  		if (name[0] == '.') switch (hashlen_len(hash_len)) {
>  			case 2:
> @@ -2452,9 +2469,29 @@ EXPORT_SYMBOL(vfs_path_lookup);
>  static int lookup_one_len_common(const char *name, struct dentry *base,
>  				 int len, struct qstr *this)
>  {
> +#ifdef CONFIG_UNICODE
> +	char *hname = NULL;
> +	int hlen = len;
> +
> +	if (needs_casefold(base->d_inode)) {
> +		struct qstr str = QSTR_INIT(name, len);
> +
> +		hname = kmalloc(PATH_MAX, GFP_ATOMIC);
> +		if (!hname)
> +			return -ENOMEM;
> +		hlen = utf8_casefold(base->d_sb->s_encoding,
> +					&str, hname, PATH_MAX);
> +	}
> +	this->hash = full_name_hash(base, hname ?: name, hlen);
> +	kvfree(hname);
> +#else
> +	this->hash = full_name_hash(base, name, len);
> +#endif
>  	this->name = name;
>  	this->len = len;
> -	this->hash = full_name_hash(base, name, len);
> +#ifdef CONFIG_UNICODE
> +	kfree(hname);
> +#endif
>  	if (!len)
>  		return -EACCES;
>  
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index c159a8bdee8b..38d1c20f3e6f 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1382,6 +1382,12 @@ extern int send_sigurg(struct fown_struct *fown);
>  #define SB_ACTIVE	(1<<30)
>  #define SB_NOUSER	(1<<31)
>  
> +/* These flags relate to encoding and casefolding */
> +#define SB_ENC_STRICT_MODE_FL	(1 << 0)
> +
> +#define sb_has_enc_strict_mode(sb) \
> +	(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
> +
>  /*
>   *	Umount options
>   */
> @@ -1449,6 +1455,10 @@ struct super_block {
>  #endif
>  #ifdef CONFIG_FS_VERITY
>  	const struct fsverity_operations *s_vop;
> +#endif
> +#ifdef CONFIG_UNICODE
> +	struct unicode_map *s_encoding;
> +	__u16 s_encoding_flags;
>  #endif
>  	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
>  	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
> @@ -2044,6 +2054,8 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
>  #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
>  				 (inode)->i_rdev == WHITEOUT_DEV)
>  
> +extern bool needs_casefold(const struct inode *dir);
> +
>  static inline bool HAS_UNMAPPED_ID(struct inode *inode)
>  {
>  	return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
Gabriel Krisman Bertazi Dec. 3, 2019, 7:42 p.m. UTC | #3
Gao Xiang <gaoxiang25@huawei.com> writes:

> On Mon, Dec 02, 2019 at 09:10:45PM -0800, Daniel Rosenberg wrote:
>> Ext4 and F2fs are both using casefolding, and they, along with any other
>> filesystem that adds the feature, will be using identical dentry_ops.
>> Additionally, those dentry ops interfere with the dentry_ops required
>> for fscrypt once we add support for casefolding and encryption.
>> Moving this into the vfs removes code duplication as well as the
>> complication with encryption.
>> 
>> Currently this is pretty close to just moving the existing f2fs/ext4
>> code up a level into the vfs, although there is a lot of room for
>> improvement now.
>> 
>> Signed-off-by: Daniel Rosenberg <drosen@google.com>
>
> I'm afraid that such vfs modification is unneeded.
>
> Just a quick glance it seems just can be replaced by introducing some
> .d_cmp, .d_hash helpers (or with little modification) and most non-Android
> emulated storage files are not casefolded (even in Android).
>
> "those dentry ops interfere with the dentry_ops required for fscrypt",
> I don't think it's a real diffculty and it could be done with some
> better approach instead.

It would be good to avoid dentry_ops in general for these cases.  It
doesn't just interfere with fscrypt, but also overlayfs and others.

The difficulty is that it is not trivial to change dentry_ops after
dentries are already installed in the dcache.  Which means that it is
hard to use different dentry_ops for different parts of the filesystem,
for instance when converting a directory to case-insensitive or back
to case-sensitive.

In fact, currently and for case-insensitive at least, we install generic
hooks for the entire case-insensitive filesystem and use it even for
!IS_CASEFOLDED() directories. This breaks overlayfs even if we don't
have a single IS_CASEFOLDED() directory at all, just by having the
superblock flag, we *must* set the dentry_ops, which already breaks
overlayfs.

I think Daniel's approach of moving this into VFS is the simplest way to
actually solve the issue, instead of extending and duplicating a lot of
functionality into filesystem hooks to support the possible mixes of
case-insensitive, overlayfs and fscrypt.
Eric Biggers Dec. 3, 2019, 8:34 p.m. UTC | #4
On Tue, Dec 03, 2019 at 02:42:10PM -0500, Gabriel Krisman Bertazi wrote:
> Gao Xiang <gaoxiang25@huawei.com> writes:
> 
> > On Mon, Dec 02, 2019 at 09:10:45PM -0800, Daniel Rosenberg wrote:
> >> Ext4 and F2fs are both using casefolding, and they, along with any other
> >> filesystem that adds the feature, will be using identical dentry_ops.
> >> Additionally, those dentry ops interfere with the dentry_ops required
> >> for fscrypt once we add support for casefolding and encryption.
> >> Moving this into the vfs removes code duplication as well as the
> >> complication with encryption.
> >> 
> >> Currently this is pretty close to just moving the existing f2fs/ext4
> >> code up a level into the vfs, although there is a lot of room for
> >> improvement now.
> >> 
> >> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> >
> > I'm afraid that such vfs modification is unneeded.
> >
> > Just a quick glance it seems just can be replaced by introducing some
> > .d_cmp, .d_hash helpers (or with little modification) and most non-Android
> > emulated storage files are not casefolded (even in Android).
> >
> > "those dentry ops interfere with the dentry_ops required for fscrypt",
> > I don't think it's a real diffculty and it could be done with some
> > better approach instead.
> 
> It would be good to avoid dentry_ops in general for these cases.  It
> doesn't just interfere with fscrypt, but also overlayfs and others.
> 
> The difficulty is that it is not trivial to change dentry_ops after
> dentries are already installed in the dcache.  Which means that it is
> hard to use different dentry_ops for different parts of the filesystem,
> for instance when converting a directory to case-insensitive or back
> to case-sensitive.
> 
> In fact, currently and for case-insensitive at least, we install generic
> hooks for the entire case-insensitive filesystem and use it even for
> !IS_CASEFOLDED() directories. This breaks overlayfs even if we don't
> have a single IS_CASEFOLDED() directory at all, just by having the
> superblock flag, we *must* set the dentry_ops, which already breaks
> overlayfs.
> 
> I think Daniel's approach of moving this into VFS is the simplest way to
> actually solve the issue, instead of extending and duplicating a lot of
> functionality into filesystem hooks to support the possible mixes of
> case-insensitive, overlayfs and fscrypt.
> 

I think we can actually get everything we want using dentry_operations only,
since the filesystem can set ->d_op during ->lookup() (like what is done for
encrypted filenames now) rather than at dentry allocation time.  And fs/crypto/
can export fscrypt_d_revalidate() rather than setting ->d_op itself.

See the untested patch below.

It's definitely ugly to have to handle the 3 cases of encrypt, casefold, and
encrypt+casefold separately -- and this will need to be duplicated for each
filesystem.  But we do have to weigh that against adding additional complexity
and overhead to the VFS for everyone.  If we do go with the VFS changes, please
try to make them as simple and unobtrusive as possible.

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 3719efa546c6..cfa44adff2b3 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(fscrypt_decrypt_block_inplace);
  * Validate dentries in encrypted directories to make sure we aren't potentially
  * caching stale dentries after a key has been added.
  */
-static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
+int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *dir;
 	int err;
@@ -329,10 +329,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 
 	return valid;
 }
-
-const struct dentry_operations fscrypt_d_ops = {
-	.d_revalidate = fscrypt_d_revalidate,
-};
+EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
 
 /**
  * fscrypt_initialize() - allocate major buffers for fs encryption.
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 130b50e5a011..4420670ac40a 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -233,7 +233,6 @@ extern int fscrypt_crypt_block(const struct inode *inode,
 			       unsigned int len, unsigned int offs,
 			       gfp_t gfp_flags);
 extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags);
-extern const struct dentry_operations fscrypt_d_ops;
 
 extern void __printf(3, 4) __cold
 fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...);
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index bb3b7fcfdd48..ec81b6a597aa 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -116,7 +116,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
 		spin_lock(&dentry->d_lock);
 		dentry->d_flags |= DCACHE_ENCRYPTED_NAME;
 		spin_unlock(&dentry->d_lock);
-		d_set_d_op(dentry, &fscrypt_d_ops);
 	}
 	return err;
 }
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 9fdd2b269d61..bd3c14e6b24a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -704,9 +704,47 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
 	kfree(norm);
 	return ret;
 }
+#endif /* !CONFIG_UNICODE */
 
-const struct dentry_operations ext4_dentry_ops = {
+#ifdef CONFIG_UNICODE
+static const struct dentry_operations ext4_ci_dentry_ops = {
+	.d_hash = ext4_d_hash,
+	.d_compare = ext4_d_compare,
+};
+#endif
+
+#ifdef CONFIG_FS_ENCRYPTION
+static const struct dentry_operations ext4_encrypted_dentry_ops = {
+	.d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static const struct dentry_operations ext4_encrypted_ci_dentry_ops = {
 	.d_hash = ext4_d_hash,
 	.d_compare = ext4_d_compare,
+	.d_revalidate = fscrypt_d_revalidate,
 };
 #endif
+
+void ext4_set_d_ops(struct inode *dir, struct dentry *dentry)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+	if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) {
+#ifdef CONFIG_UNICODE
+		if (IS_CASEFOLDED(dir)) {
+			d_set_d_op(dentry, &ext4_encrypted_ci_dentry_ops);
+			return;
+		}
+#endif
+		d_set_d_op(dentry, &ext4_encrypted_dentry_ops);
+		return;
+	}
+#endif
+#ifdef CONFIG_UNICODE
+	if (IS_CASEFOLDED(dir)) {
+		d_set_d_op(dentry, &ext4_ci_dentry_ops);
+		return;
+	}
+#endif
+}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f8578caba40d..00a10015a53c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2499,6 +2499,8 @@ static inline  unsigned char get_dtype(struct super_block *sb, int filetype)
 extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
 			     void *buf, int buf_size);
 
+void ext4_set_d_ops(struct inode *dir, struct dentry *dentry);
+
 /* fsync.c */
 extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
 
@@ -3097,10 +3099,6 @@ static inline void ext4_unlock_group(struct super_block *sb,
 /* dir.c */
 extern const struct file_operations ext4_dir_operations;
 
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations ext4_dentry_ops;
-#endif
-
 /* file.c */
 extern const struct inode_operations ext4_file_inode_operations;
 extern const struct file_operations ext4_file_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a856997d87b5..4df1d074b393 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1608,6 +1608,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
 	struct buffer_head *bh;
 
 	err = ext4_fname_prepare_lookup(dir, dentry, &fname);
+	ext4_set_d_ops(dir, dentry);
 	if (err == -ENOENT)
 		return NULL;
 	if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1d82b56d9b11..ac593e9af270 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4498,11 +4498,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		goto failed_mount4;
 	}
 
-#ifdef CONFIG_UNICODE
-	if (sbi->s_encoding)
-		sb->s_d_op = &ext4_dentry_ops;
-#endif
-
 	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
 		ext4_msg(sb, KERN_ERR, "get root dentry failed");
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 1a7bffe78ed5..0de461f2225a 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -120,6 +120,8 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
 
 extern void fscrypt_free_bounce_page(struct page *bounce_page);
 
+extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
+
 /* policy.c */
 extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
 extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
Gabriel Krisman Bertazi Dec. 3, 2019, 9:21 p.m. UTC | #5
Eric Biggers <ebiggers@kernel.org> writes:

> On Tue, Dec 03, 2019 at 02:42:10PM -0500, Gabriel Krisman Bertazi wrote:
>> Gao Xiang <gaoxiang25@huawei.com> writes:

>> I think Daniel's approach of moving this into VFS is the simplest way to
>> actually solve the issue, instead of extending and duplicating a lot of
>> functionality into filesystem hooks to support the possible mixes of
>> case-insensitive, overlayfs and fscrypt.
>> 
>
> I think we can actually get everything we want using dentry_operations only,
> since the filesystem can set ->d_op during ->lookup() (like what is done for
> encrypted filenames now) rather than at dentry allocation time.  And fs/crypto/
> can export fscrypt_d_revalidate() rather than setting ->d_op itself.

Problem is, differently from fscrypt, case-insensitive uses the d_hash()
hook and for a lookup, we actually use
dentry->d_parent->d_ops->d_hash().  Which works well, until you are flipping the
casefold flag.  Then the dentry already exists and you need to modify
the d_ops on the fly, which I couldn't find precedent anywhere.  I tried
invalidating the dentry whenever we flip the flag, but then if it has
negative dentries as children,I wasn't able to reliably invalidate it,
and that's when I reached the limit of my knowledge in VFS.  In
particular, in every attempt I made to implement it like this, I was
able to race and do a case-insensitive lookup on a directory that was
just made case sensitive.

I'm not saying there isn't a way.  But it is a bit harder than this
proposal. I tried it already and still didn't manage to make it work.
Maybe someone who better understands vfs.

> It's definitely ugly to have to handle the 3 cases of encrypt, casefold, and
> encrypt+casefold separately -- and this will need to be duplicated for each
> filesystem.  But we do have to weigh that against adding additional complexity
> and overhead to the VFS for everyone.  If we do go with the VFS changes, please
> try to make them as simple and unobtrusive as possible.

Well, it is just not case-insensitive+fscrypt. Also overlayfs
there. Probably more.  So we have much more cases.  I understand the VFS
changes need to be very well thought, but when I worked on this it
started to look a more correct solution than using the hooks.

> diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
> index 3719efa546c6..cfa44adff2b3 100644
> --- a/fs/crypto/crypto.c
> +++ b/fs/crypto/crypto.c
> @@ -290,7 +290,7 @@ EXPORT_SYMBOL(fscrypt_decrypt_block_inplace);
>   * Validate dentries in encrypted directories to make sure we aren't potentially
>   * caching stale dentries after a key has been added.
>   */
> -static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
> +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
>  {
>  	struct dentry *dir;
>  	int err;
> @@ -329,10 +329,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
>  
>  	return valid;
>  }
> -
> -const struct dentry_operations fscrypt_d_ops = {
> -	.d_revalidate = fscrypt_d_revalidate,
> -};
> +EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
>  
>  /**
>   * fscrypt_initialize() - allocate major buffers for fs encryption.
> diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
> index 130b50e5a011..4420670ac40a 100644
> --- a/fs/crypto/fscrypt_private.h
> +++ b/fs/crypto/fscrypt_private.h
> @@ -233,7 +233,6 @@ extern int fscrypt_crypt_block(const struct inode *inode,
>  			       unsigned int len, unsigned int offs,
>  			       gfp_t gfp_flags);
>  extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags);
> -extern const struct dentry_operations fscrypt_d_ops;
>  
>  extern void __printf(3, 4) __cold
>  fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...);
> diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
> index bb3b7fcfdd48..ec81b6a597aa 100644
> --- a/fs/crypto/hooks.c
> +++ b/fs/crypto/hooks.c
> @@ -116,7 +116,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
>  		spin_lock(&dentry->d_lock);
>  		dentry->d_flags |= DCACHE_ENCRYPTED_NAME;
>  		spin_unlock(&dentry->d_lock);
> -		d_set_d_op(dentry, &fscrypt_d_ops);
>  	}
>  	return err;
>  }
> diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
> index 9fdd2b269d61..bd3c14e6b24a 100644
> --- a/fs/ext4/dir.c
> +++ b/fs/ext4/dir.c
> @@ -704,9 +704,47 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
>  	kfree(norm);
>  	return ret;
>  }
> +#endif /* !CONFIG_UNICODE */
>  
> -const struct dentry_operations ext4_dentry_ops = {
> +#ifdef CONFIG_UNICODE
> +static const struct dentry_operations ext4_ci_dentry_ops = {
> +	.d_hash = ext4_d_hash,
> +	.d_compare = ext4_d_compare,
> +};
> +#endif
> +
> +#ifdef CONFIG_FS_ENCRYPTION
> +static const struct dentry_operations ext4_encrypted_dentry_ops = {
> +	.d_revalidate = fscrypt_d_revalidate,
> +};
> +#endif
> +
> +#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION)
> +static const struct dentry_operations ext4_encrypted_ci_dentry_ops = {
>  	.d_hash = ext4_d_hash,
>  	.d_compare = ext4_d_compare,
> +	.d_revalidate = fscrypt_d_revalidate,
>  };
>  #endif
> +
> +void ext4_set_d_ops(struct inode *dir, struct dentry *dentry)
> +{
> +#ifdef CONFIG_FS_ENCRYPTION
> +	if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) {
> +#ifdef CONFIG_UNICODE
> +		if (IS_CASEFOLDED(dir)) {
> +			d_set_d_op(dentry, &ext4_encrypted_ci_dentry_ops);
> +			return;
> +		}
> +#endif
> +		d_set_d_op(dentry, &ext4_encrypted_dentry_ops);
> +		return;
> +	}
> +#endif
> +#ifdef CONFIG_UNICODE
> +	if (IS_CASEFOLDED(dir)) {
> +		d_set_d_op(dentry, &ext4_ci_dentry_ops);
> +		return;
> +	}
> +#endif
> +}
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index f8578caba40d..00a10015a53c 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2499,6 +2499,8 @@ static inline  unsigned char get_dtype(struct super_block *sb, int filetype)
>  extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
>  			     void *buf, int buf_size);
>  
> +void ext4_set_d_ops(struct inode *dir, struct dentry *dentry);
> +
>  /* fsync.c */
>  extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
>  
> @@ -3097,10 +3099,6 @@ static inline void ext4_unlock_group(struct super_block *sb,
>  /* dir.c */
>  extern const struct file_operations ext4_dir_operations;
>  
> -#ifdef CONFIG_UNICODE
> -extern const struct dentry_operations ext4_dentry_ops;
> -#endif
> -
>  /* file.c */
>  extern const struct inode_operations ext4_file_inode_operations;
>  extern const struct file_operations ext4_file_operations;
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index a856997d87b5..4df1d074b393 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -1608,6 +1608,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
>  	struct buffer_head *bh;
>  
>  	err = ext4_fname_prepare_lookup(dir, dentry, &fname);
> +	ext4_set_d_ops(dir, dentry);
>  	if (err == -ENOENT)
>  		return NULL;
>  	if (err)
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 1d82b56d9b11..ac593e9af270 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -4498,11 +4498,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>  		goto failed_mount4;
>  	}
>  
> -#ifdef CONFIG_UNICODE
> -	if (sbi->s_encoding)
> -		sb->s_d_op = &ext4_dentry_ops;
> -#endif
> -
>  	sb->s_root = d_make_root(root);
>  	if (!sb->s_root) {
>  		ext4_msg(sb, KERN_ERR, "get root dentry failed");
> diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
> index 1a7bffe78ed5..0de461f2225a 100644
> --- a/include/linux/fscrypt.h
> +++ b/include/linux/fscrypt.h
> @@ -120,6 +120,8 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
>  
>  extern void fscrypt_free_bounce_page(struct page *bounce_page);
>  
> +extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
> +
>  /* policy.c */
>  extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
>  extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
Eric Biggers Dec. 4, 2019, 12:32 a.m. UTC | #6
On Tue, Dec 03, 2019 at 04:21:02PM -0500, Gabriel Krisman Bertazi wrote:
> Eric Biggers <ebiggers@kernel.org> writes:
> 
> > On Tue, Dec 03, 2019 at 02:42:10PM -0500, Gabriel Krisman Bertazi wrote:
> >> Gao Xiang <gaoxiang25@huawei.com> writes:
> 
> >> I think Daniel's approach of moving this into VFS is the simplest way to
> >> actually solve the issue, instead of extending and duplicating a lot of
> >> functionality into filesystem hooks to support the possible mixes of
> >> case-insensitive, overlayfs and fscrypt.
> >> 
> >
> > I think we can actually get everything we want using dentry_operations only,
> > since the filesystem can set ->d_op during ->lookup() (like what is done for
> > encrypted filenames now) rather than at dentry allocation time.  And fs/crypto/
> > can export fscrypt_d_revalidate() rather than setting ->d_op itself.
> 
> Problem is, differently from fscrypt, case-insensitive uses the d_hash()
> hook and for a lookup, we actually use
> dentry->d_parent->d_ops->d_hash().  Which works well, until you are flipping the
> casefold flag.  Then the dentry already exists and you need to modify
> the d_ops on the fly, which I couldn't find precedent anywhere.  I tried
> invalidating the dentry whenever we flip the flag, but then if it has
> negative dentries as children,I wasn't able to reliably invalidate it,
> and that's when I reached the limit of my knowledge in VFS.  In
> particular, in every attempt I made to implement it like this, I was
> able to race and do a case-insensitive lookup on a directory that was
> just made case sensitive.
> 
> I'm not saying there isn't a way.  But it is a bit harder than this
> proposal. I tried it already and still didn't manage to make it work.
> Maybe someone who better understands vfs.

Yes you're right, I forgot that for ->d_hash() and ->d_compare() it's actually
the parent's directory dentry_operations that are used.

> 
> > It's definitely ugly to have to handle the 3 cases of encrypt, casefold, and
> > encrypt+casefold separately -- and this will need to be duplicated for each
> > filesystem.  But we do have to weigh that against adding additional complexity
> > and overhead to the VFS for everyone.  If we do go with the VFS changes, please
> > try to make them as simple and unobtrusive as possible.
> 
> Well, it is just not case-insensitive+fscrypt. Also overlayfs
> there. Probably more.  So we have much more cases.  I understand the VFS
> changes need to be very well thought, but when I worked on this it
> started to look a more correct solution than using the hooks.

Well the point of my proof-of-concept patch having separate ext4_ci_dentry_ops,
ext4_encrypted_dentry_ops, and ext4_encrypted_ci_dentry_ops is supposed to be
for overlayfs support -- since overlayfs requires that some operations are not
present.  If we didn't need overlayfs support, we could just use a single
ext4_dentry_ops for all dentries instead.

I think we could still support fscrypt, casefold, fscrypt+casefold, and
fscrypt+overlayfs with dentry_operations only.  It's casefold+overlayfs that's
the biggest problem, due to the possibility of the casefold flag being set on a
directory later as you pointed out.

- Eric
Theodore Ts'o Jan. 3, 2020, 8:26 p.m. UTC | #7
On Mon, Dec 02, 2019 at 09:10:45PM -0800, Daniel Rosenberg wrote:
> @@ -228,6 +229,13 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
>  
>  #endif
>  
> +bool needs_casefold(const struct inode *dir)
> +{
> +	return IS_CASEFOLDED(dir) &&
> +			(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
> +}
> +EXPORT_SYMBOL(needs_casefold);
> +

I'd suggest adding a check to make sure that dir->i_sb->s_encoding is
non-NULL before needs_casefold() returns non-NULL.  Otherwise a bug
(or a fuzzed file system) which manages to set the S_CASEFOLD flag without having s_encoding be initialized might cause a NULL dereference.

Also, maybe make needs_casefold() an inline function which returns 0
if CONFIG_UNICODE is not defined?  That way the need for #ifdef
CONFIG_UNICODE could be reduced.

> @@ -247,7 +255,19 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
>  	 * be no NUL in the ct/tcount data)
>  	 */
>  	const unsigned char *cs = READ_ONCE(dentry->d_name.name);
> +#ifdef CONFIG_UNICODE
> +	struct inode *parent = dentry->d_parent->d_inode;
>  
> +	if (unlikely(needs_casefold(parent))) {
> +		const struct qstr n1 = QSTR_INIT(cs, tcount);
> +		const struct qstr n2 = QSTR_INIT(ct, tcount);
> +		int result = utf8_strncasecmp(dentry->d_sb->s_encoding,
> +						&n1, &n2);
> +
> +		if (result >= 0 || sb_has_enc_strict_mode(dentry->d_sb))
> +			return result;
> +	}
> +#endif

This is an example of how we could drop the #ifdef CONFIG_UNICODE
(moving the declaration of 'parent' into the #if statement) if
needs_casefold() always returns 0 if !defined(CONFIG_UNICODE).

> @@ -2404,7 +2424,22 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
>  	 * calculate the standard hash first, as the d_op->d_hash()
>  	 * routine may choose to leave the hash value unchanged.
>  	 */
> +#ifdef CONFIG_UNICODE
> +	unsigned char *hname = NULL;
> +	int hlen = name->len;
> +
> +	if (IS_CASEFOLDED(dir->d_inode)) {
> +		hname = kmalloc(PATH_MAX, GFP_ATOMIC);
> +		if (!hname)
> +			return ERR_PTR(-ENOMEM);
> +		hlen = utf8_casefold(dir->d_sb->s_encoding,
> +					name, hname, PATH_MAX);
> +	}
> +	name->hash = full_name_hash(dir, hname ?: name->name, hlen);
> +	kfree(hname);
> +#else
>  	name->hash = full_name_hash(dir, name->name, name->len);
> +#endif

Perhaps this could be refactored out?  It's also used in
link_path_walk() and lookup_one_len_common().

(Note, there was some strageness in lookup_one_len_common(), where
hname is freed twice, the first time using kvfree() which I don't
believe is needed.  But this can be fixed as part of the refactoring.)

	   	    	     	    	  - Ted
diff mbox series

Patch

diff --git a/fs/dcache.c b/fs/dcache.c
index f7931b682a0d..575f3c2c3f77 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@ 
 #include <linux/bit_spinlock.h>
 #include <linux/rculist_bl.h>
 #include <linux/list_lru.h>
+#include <linux/unicode.h>
 #include "internal.h"
 #include "mount.h"
 
@@ -228,6 +229,13 @@  static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
 
 #endif
 
+bool needs_casefold(const struct inode *dir)
+{
+	return IS_CASEFOLDED(dir) &&
+			(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
+}
+EXPORT_SYMBOL(needs_casefold);
+
 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
 {
 	/*
@@ -247,7 +255,19 @@  static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
 	 * be no NUL in the ct/tcount data)
 	 */
 	const unsigned char *cs = READ_ONCE(dentry->d_name.name);
+#ifdef CONFIG_UNICODE
+	struct inode *parent = dentry->d_parent->d_inode;
 
+	if (unlikely(needs_casefold(parent))) {
+		const struct qstr n1 = QSTR_INIT(cs, tcount);
+		const struct qstr n2 = QSTR_INIT(ct, tcount);
+		int result = utf8_strncasecmp(dentry->d_sb->s_encoding,
+						&n1, &n2);
+
+		if (result >= 0 || sb_has_enc_strict_mode(dentry->d_sb))
+			return result;
+	}
+#endif
 	return dentry_string_cmp(cs, ct, tcount);
 }
 
@@ -2404,7 +2424,22 @@  struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
 	 * calculate the standard hash first, as the d_op->d_hash()
 	 * routine may choose to leave the hash value unchanged.
 	 */
+#ifdef CONFIG_UNICODE
+	unsigned char *hname = NULL;
+	int hlen = name->len;
+
+	if (IS_CASEFOLDED(dir->d_inode)) {
+		hname = kmalloc(PATH_MAX, GFP_ATOMIC);
+		if (!hname)
+			return ERR_PTR(-ENOMEM);
+		hlen = utf8_casefold(dir->d_sb->s_encoding,
+					name, hname, PATH_MAX);
+	}
+	name->hash = full_name_hash(dir, hname ?: name->name, hlen);
+	kfree(hname);
+#else
 	name->hash = full_name_hash(dir, name->name, name->len);
+#endif
 	if (dir->d_flags & DCACHE_OP_HASH) {
 		int err = dir->d_op->d_hash(dir, name);
 		if (unlikely(err < 0))
diff --git a/fs/namei.c b/fs/namei.c
index 2dda552bcf7a..b8d5cb0994ec 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -39,6 +39,7 @@ 
 #include <linux/bitops.h>
 #include <linux/init_task.h>
 #include <linux/uaccess.h>
+#include <linux/unicode.h>
 
 #include "internal.h"
 #include "mount.h"
@@ -2062,6 +2063,10 @@  static inline u64 hash_name(const void *salt, const char *name)
 static int link_path_walk(const char *name, struct nameidata *nd)
 {
 	int err;
+#ifdef CONFIG_UNICODE
+	char *hname = NULL;
+	int hlen = 0;
+#endif
 
 	if (IS_ERR(name))
 		return PTR_ERR(name);
@@ -2078,9 +2083,21 @@  static int link_path_walk(const char *name, struct nameidata *nd)
 		err = may_lookup(nd);
 		if (err)
 			return err;
-
+#ifdef CONFIG_UNICODE
+		if (needs_casefold(nd->path.dentry->d_inode)) {
+			struct qstr str = QSTR_INIT(name, PATH_MAX);
+
+			hname = kmalloc(PATH_MAX, GFP_ATOMIC);
+			if (!hname)
+				return -ENOMEM;
+			hlen = utf8_casefold(nd->path.dentry->d_sb->s_encoding,
+						&str, hname, PATH_MAX);
+		}
+		hash_len = hash_name(nd->path.dentry, hname ?: name);
+		kfree(hname);
+#else
 		hash_len = hash_name(nd->path.dentry, name);
-
+#endif
 		type = LAST_NORM;
 		if (name[0] == '.') switch (hashlen_len(hash_len)) {
 			case 2:
@@ -2452,9 +2469,29 @@  EXPORT_SYMBOL(vfs_path_lookup);
 static int lookup_one_len_common(const char *name, struct dentry *base,
 				 int len, struct qstr *this)
 {
+#ifdef CONFIG_UNICODE
+	char *hname = NULL;
+	int hlen = len;
+
+	if (needs_casefold(base->d_inode)) {
+		struct qstr str = QSTR_INIT(name, len);
+
+		hname = kmalloc(PATH_MAX, GFP_ATOMIC);
+		if (!hname)
+			return -ENOMEM;
+		hlen = utf8_casefold(base->d_sb->s_encoding,
+					&str, hname, PATH_MAX);
+	}
+	this->hash = full_name_hash(base, hname ?: name, hlen);
+	kvfree(hname);
+#else
+	this->hash = full_name_hash(base, name, len);
+#endif
 	this->name = name;
 	this->len = len;
-	this->hash = full_name_hash(base, name, len);
+#ifdef CONFIG_UNICODE
+	kfree(hname);
+#endif
 	if (!len)
 		return -EACCES;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c159a8bdee8b..38d1c20f3e6f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1382,6 +1382,12 @@  extern int send_sigurg(struct fown_struct *fown);
 #define SB_ACTIVE	(1<<30)
 #define SB_NOUSER	(1<<31)
 
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL	(1 << 0)
+
+#define sb_has_enc_strict_mode(sb) \
+	(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+
 /*
  *	Umount options
  */
@@ -1449,6 +1455,10 @@  struct super_block {
 #endif
 #ifdef CONFIG_FS_VERITY
 	const struct fsverity_operations *s_vop;
+#endif
+#ifdef CONFIG_UNICODE
+	struct unicode_map *s_encoding;
+	__u16 s_encoding_flags;
 #endif
 	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
@@ -2044,6 +2054,8 @@  static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
 #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
 				 (inode)->i_rdev == WHITEOUT_DEV)
 
+extern bool needs_casefold(const struct inode *dir);
+
 static inline bool HAS_UNMAPPED_ID(struct inode *inode)
 {
 	return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);