Message ID | 20191203051049.44573-5-drosen@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support for Casefolding and Encryption | expand |
On Mon, Dec 02, 2019 at 09:10:45PM -0800, Daniel Rosenberg wrote: > Ext4 and F2fs are both using casefolding, and they, along with any other > filesystem that adds the feature, will be using identical dentry_ops. > Additionally, those dentry ops interfere with the dentry_ops required > for fscrypt once we add support for casefolding and encryption. > Moving this into the vfs removes code duplication as well as the > complication with encryption. > > Currently this is pretty close to just moving the existing f2fs/ext4 > code up a level into the vfs, although there is a lot of room for > improvement now. > > Signed-off-by: Daniel Rosenberg <drosen@google.com> I'm afraid that such vfs modification is unneeded. Just a quick glance it seems just can be replaced by introducing some .d_cmp, .d_hash helpers (or with little modification) and most non-Android emulated storage files are not casefolded (even in Android). "those dentry ops interfere with the dentry_ops required for fscrypt", I don't think it's a real diffculty and it could be done with some better approach instead. Thanks, Gao Xiang
Daniel Rosenberg <drosen@google.com> writes: > Ext4 and F2fs are both using casefolding, and they, along with any other > filesystem that adds the feature, will be using identical dentry_ops. > Additionally, those dentry ops interfere with the dentry_ops required > for fscrypt once we add support for casefolding and encryption. > Moving this into the vfs removes code duplication as well as the > complication with encryption. > > Currently this is pretty close to just moving the existing f2fs/ext4 > code up a level into the vfs, although there is a lot of room for > improvement now. > > Signed-off-by: Daniel Rosenberg <drosen@google.com> > --- > fs/dcache.c | 35 +++++++++++++++++++++++++++++++++++ > fs/namei.c | 43 ++++++++++++++++++++++++++++++++++++++++--- > include/linux/fs.h | 12 ++++++++++++ > 3 files changed, 87 insertions(+), 3 deletions(-) > > diff --git a/fs/dcache.c b/fs/dcache.c > index f7931b682a0d..575f3c2c3f77 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -32,6 +32,7 @@ > #include <linux/bit_spinlock.h> > #include <linux/rculist_bl.h> > #include <linux/list_lru.h> > +#include <linux/unicode.h> > #include "internal.h" > #include "mount.h" > > @@ -228,6 +229,13 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char > > #endif > > +bool needs_casefold(const struct inode *dir) > +{ > + return IS_CASEFOLDED(dir) && > + (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir)); > +} > +EXPORT_SYMBOL(needs_casefold); > + > static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) > { > /* > @@ -247,7 +255,19 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c > * be no NUL in the ct/tcount data) > */ > const unsigned char *cs = READ_ONCE(dentry->d_name.name); > +#ifdef CONFIG_UNICODE > + struct inode *parent = dentry->d_parent->d_inode; > > + if (unlikely(needs_casefold(parent))) { > + const struct qstr n1 = QSTR_INIT(cs, tcount); > + const struct qstr n2 = QSTR_INIT(ct, tcount); > + int result = utf8_strncasecmp(dentry->d_sb->s_encoding, > + &n1, &n2); > + > + if (result >= 0 || sb_has_enc_strict_mode(dentry->d_sb)) > + return result; > + } > +#endif > return dentry_string_cmp(cs, ct, tcount); > } > > @@ -2404,7 +2424,22 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) > * calculate the standard hash first, as the d_op->d_hash() > * routine may choose to leave the hash value unchanged. > */ > +#ifdef CONFIG_UNICODE > + unsigned char *hname = NULL; > + int hlen = name->len; > + > + if (IS_CASEFOLDED(dir->d_inode)) { > + hname = kmalloc(PATH_MAX, GFP_ATOMIC); > + if (!hname) > + return ERR_PTR(-ENOMEM); > + hlen = utf8_casefold(dir->d_sb->s_encoding, > + name, hname, PATH_MAX); > + } > + name->hash = full_name_hash(dir, hname ?: name->name, hlen); > + kfree(hname); > +#else > name->hash = full_name_hash(dir, name->name, name->len); > +#endif > if (dir->d_flags & DCACHE_OP_HASH) { > int err = dir->d_op->d_hash(dir, name); > if (unlikely(err < 0)) > diff --git a/fs/namei.c b/fs/namei.c > index 2dda552bcf7a..b8d5cb0994ec 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -39,6 +39,7 @@ > #include <linux/bitops.h> > #include <linux/init_task.h> > #include <linux/uaccess.h> > +#include <linux/unicode.h> > > #include "internal.h" > #include "mount.h" > @@ -2062,6 +2063,10 @@ static inline u64 hash_name(const void *salt, const char *name) > static int link_path_walk(const char *name, struct nameidata *nd) > { > int err; > +#ifdef CONFIG_UNICODE > + char *hname = NULL; > + int hlen = 0; > +#endif > > if (IS_ERR(name)) > return PTR_ERR(name); > @@ -2078,9 +2083,21 @@ static int link_path_walk(const char *name, struct nameidata *nd) > err = may_lookup(nd); > if (err) > return err; > - > +#ifdef CONFIG_UNICODE > + if (needs_casefold(nd->path.dentry->d_inode)) { > + struct qstr str = QSTR_INIT(name, PATH_MAX); > + > + hname = kmalloc(PATH_MAX, GFP_ATOMIC); > + if (!hname) > + return -ENOMEM; > + hlen = utf8_casefold(nd->path.dentry->d_sb->s_encoding, > + &str, hname, PATH_MAX); > + } > + hash_len = hash_name(nd->path.dentry, hname ?: name); > + kfree(hname); It would be nice to reuse the memory allocation for the entire path walk instead of allocating and freeing several times in a row. Still not ideal, but I don't see how we could not have at least one allocation here. > +#else > hash_len = hash_name(nd->path.dentry, name); > - > +#endif > type = LAST_NORM; > if (name[0] == '.') switch (hashlen_len(hash_len)) { > case 2: > @@ -2452,9 +2469,29 @@ EXPORT_SYMBOL(vfs_path_lookup); > static int lookup_one_len_common(const char *name, struct dentry *base, > int len, struct qstr *this) > { > +#ifdef CONFIG_UNICODE > + char *hname = NULL; > + int hlen = len; > + > + if (needs_casefold(base->d_inode)) { > + struct qstr str = QSTR_INIT(name, len); > + > + hname = kmalloc(PATH_MAX, GFP_ATOMIC); > + if (!hname) > + return -ENOMEM; > + hlen = utf8_casefold(base->d_sb->s_encoding, > + &str, hname, PATH_MAX); > + } > + this->hash = full_name_hash(base, hname ?: name, hlen); > + kvfree(hname); > +#else > + this->hash = full_name_hash(base, name, len); > +#endif > this->name = name; > this->len = len; > - this->hash = full_name_hash(base, name, len); > +#ifdef CONFIG_UNICODE > + kfree(hname); > +#endif > if (!len) > return -EACCES; > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index c159a8bdee8b..38d1c20f3e6f 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1382,6 +1382,12 @@ extern int send_sigurg(struct fown_struct *fown); > #define SB_ACTIVE (1<<30) > #define SB_NOUSER (1<<31) > > +/* These flags relate to encoding and casefolding */ > +#define SB_ENC_STRICT_MODE_FL (1 << 0) > + > +#define sb_has_enc_strict_mode(sb) \ > + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) > + > /* > * Umount options > */ > @@ -1449,6 +1455,10 @@ struct super_block { > #endif > #ifdef CONFIG_FS_VERITY > const struct fsverity_operations *s_vop; > +#endif > +#ifdef CONFIG_UNICODE > + struct unicode_map *s_encoding; > + __u16 s_encoding_flags; > #endif > struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ > struct list_head s_mounts; /* list of mounts; _not_ for fs use */ > @@ -2044,6 +2054,8 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags > #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ > (inode)->i_rdev == WHITEOUT_DEV) > > +extern bool needs_casefold(const struct inode *dir); > + > static inline bool HAS_UNMAPPED_ID(struct inode *inode) > { > return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
Gao Xiang <gaoxiang25@huawei.com> writes: > On Mon, Dec 02, 2019 at 09:10:45PM -0800, Daniel Rosenberg wrote: >> Ext4 and F2fs are both using casefolding, and they, along with any other >> filesystem that adds the feature, will be using identical dentry_ops. >> Additionally, those dentry ops interfere with the dentry_ops required >> for fscrypt once we add support for casefolding and encryption. >> Moving this into the vfs removes code duplication as well as the >> complication with encryption. >> >> Currently this is pretty close to just moving the existing f2fs/ext4 >> code up a level into the vfs, although there is a lot of room for >> improvement now. >> >> Signed-off-by: Daniel Rosenberg <drosen@google.com> > > I'm afraid that such vfs modification is unneeded. > > Just a quick glance it seems just can be replaced by introducing some > .d_cmp, .d_hash helpers (or with little modification) and most non-Android > emulated storage files are not casefolded (even in Android). > > "those dentry ops interfere with the dentry_ops required for fscrypt", > I don't think it's a real diffculty and it could be done with some > better approach instead. It would be good to avoid dentry_ops in general for these cases. It doesn't just interfere with fscrypt, but also overlayfs and others. The difficulty is that it is not trivial to change dentry_ops after dentries are already installed in the dcache. Which means that it is hard to use different dentry_ops for different parts of the filesystem, for instance when converting a directory to case-insensitive or back to case-sensitive. In fact, currently and for case-insensitive at least, we install generic hooks for the entire case-insensitive filesystem and use it even for !IS_CASEFOLDED() directories. This breaks overlayfs even if we don't have a single IS_CASEFOLDED() directory at all, just by having the superblock flag, we *must* set the dentry_ops, which already breaks overlayfs. I think Daniel's approach of moving this into VFS is the simplest way to actually solve the issue, instead of extending and duplicating a lot of functionality into filesystem hooks to support the possible mixes of case-insensitive, overlayfs and fscrypt.
On Tue, Dec 03, 2019 at 02:42:10PM -0500, Gabriel Krisman Bertazi wrote: > Gao Xiang <gaoxiang25@huawei.com> writes: > > > On Mon, Dec 02, 2019 at 09:10:45PM -0800, Daniel Rosenberg wrote: > >> Ext4 and F2fs are both using casefolding, and they, along with any other > >> filesystem that adds the feature, will be using identical dentry_ops. > >> Additionally, those dentry ops interfere with the dentry_ops required > >> for fscrypt once we add support for casefolding and encryption. > >> Moving this into the vfs removes code duplication as well as the > >> complication with encryption. > >> > >> Currently this is pretty close to just moving the existing f2fs/ext4 > >> code up a level into the vfs, although there is a lot of room for > >> improvement now. > >> > >> Signed-off-by: Daniel Rosenberg <drosen@google.com> > > > > I'm afraid that such vfs modification is unneeded. > > > > Just a quick glance it seems just can be replaced by introducing some > > .d_cmp, .d_hash helpers (or with little modification) and most non-Android > > emulated storage files are not casefolded (even in Android). > > > > "those dentry ops interfere with the dentry_ops required for fscrypt", > > I don't think it's a real diffculty and it could be done with some > > better approach instead. > > It would be good to avoid dentry_ops in general for these cases. It > doesn't just interfere with fscrypt, but also overlayfs and others. > > The difficulty is that it is not trivial to change dentry_ops after > dentries are already installed in the dcache. Which means that it is > hard to use different dentry_ops for different parts of the filesystem, > for instance when converting a directory to case-insensitive or back > to case-sensitive. > > In fact, currently and for case-insensitive at least, we install generic > hooks for the entire case-insensitive filesystem and use it even for > !IS_CASEFOLDED() directories. This breaks overlayfs even if we don't > have a single IS_CASEFOLDED() directory at all, just by having the > superblock flag, we *must* set the dentry_ops, which already breaks > overlayfs. > > I think Daniel's approach of moving this into VFS is the simplest way to > actually solve the issue, instead of extending and duplicating a lot of > functionality into filesystem hooks to support the possible mixes of > case-insensitive, overlayfs and fscrypt. > I think we can actually get everything we want using dentry_operations only, since the filesystem can set ->d_op during ->lookup() (like what is done for encrypted filenames now) rather than at dentry allocation time. And fs/crypto/ can export fscrypt_d_revalidate() rather than setting ->d_op itself. See the untested patch below. It's definitely ugly to have to handle the 3 cases of encrypt, casefold, and encrypt+casefold separately -- and this will need to be duplicated for each filesystem. But we do have to weigh that against adding additional complexity and overhead to the VFS for everyone. If we do go with the VFS changes, please try to make them as simple and unobtrusive as possible. diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 3719efa546c6..cfa44adff2b3 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -290,7 +290,7 @@ EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. */ -static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *dir; int err; @@ -329,10 +329,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return valid; } - -const struct dentry_operations fscrypt_d_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; +EXPORT_SYMBOL_GPL(fscrypt_d_revalidate); /** * fscrypt_initialize() - allocate major buffers for fs encryption. diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 130b50e5a011..4420670ac40a 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -233,7 +233,6 @@ extern int fscrypt_crypt_block(const struct inode *inode, unsigned int len, unsigned int offs, gfp_t gfp_flags); extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); -extern const struct dentry_operations fscrypt_d_ops; extern void __printf(3, 4) __cold fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index bb3b7fcfdd48..ec81b6a597aa 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -116,7 +116,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); - d_set_d_op(dentry, &fscrypt_d_ops); } return err; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 9fdd2b269d61..bd3c14e6b24a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -704,9 +704,47 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) kfree(norm); return ret; } +#endif /* !CONFIG_UNICODE */ -const struct dentry_operations ext4_dentry_ops = { +#ifdef CONFIG_UNICODE +static const struct dentry_operations ext4_ci_dentry_ops = { + .d_hash = ext4_d_hash, + .d_compare = ext4_d_compare, +}; +#endif + +#ifdef CONFIG_FS_ENCRYPTION +static const struct dentry_operations ext4_encrypted_dentry_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION) +static const struct dentry_operations ext4_encrypted_ci_dentry_ops = { .d_hash = ext4_d_hash, .d_compare = ext4_d_compare, + .d_revalidate = fscrypt_d_revalidate, }; #endif + +void ext4_set_d_ops(struct inode *dir, struct dentry *dentry) +{ +#ifdef CONFIG_FS_ENCRYPTION + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { +#ifdef CONFIG_UNICODE + if (IS_CASEFOLDED(dir)) { + d_set_d_op(dentry, &ext4_encrypted_ci_dentry_ops); + return; + } +#endif + d_set_d_op(dentry, &ext4_encrypted_dentry_ops); + return; + } +#endif +#ifdef CONFIG_UNICODE + if (IS_CASEFOLDED(dir)) { + d_set_d_op(dentry, &ext4_ci_dentry_ops); + return; + } +#endif +} diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f8578caba40d..00a10015a53c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2499,6 +2499,8 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype) extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, int buf_size); +void ext4_set_d_ops(struct inode *dir, struct dentry *dentry); + /* fsync.c */ extern int ext4_sync_file(struct file *, loff_t, loff_t, int); @@ -3097,10 +3099,6 @@ static inline void ext4_unlock_group(struct super_block *sb, /* dir.c */ extern const struct file_operations ext4_dir_operations; -#ifdef CONFIG_UNICODE -extern const struct dentry_operations ext4_dentry_ops; -#endif - /* file.c */ extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a856997d87b5..4df1d074b393 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1608,6 +1608,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir, struct buffer_head *bh; err = ext4_fname_prepare_lookup(dir, dentry, &fname); + ext4_set_d_ops(dir, dentry); if (err == -ENOENT) return NULL; if (err) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1d82b56d9b11..ac593e9af270 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4498,11 +4498,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount4; } -#ifdef CONFIG_UNICODE - if (sbi->s_encoding) - sb->s_d_op = &ext4_dentry_ops; -#endif - sb->s_root = d_make_root(root); if (!sb->s_root) { ext4_msg(sb, KERN_ERR, "get root dentry failed"); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 1a7bffe78ed5..0de461f2225a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -120,6 +120,8 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) extern void fscrypt_free_bounce_page(struct page *bounce_page); +extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); + /* policy.c */ extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
Eric Biggers <ebiggers@kernel.org> writes: > On Tue, Dec 03, 2019 at 02:42:10PM -0500, Gabriel Krisman Bertazi wrote: >> Gao Xiang <gaoxiang25@huawei.com> writes: >> I think Daniel's approach of moving this into VFS is the simplest way to >> actually solve the issue, instead of extending and duplicating a lot of >> functionality into filesystem hooks to support the possible mixes of >> case-insensitive, overlayfs and fscrypt. >> > > I think we can actually get everything we want using dentry_operations only, > since the filesystem can set ->d_op during ->lookup() (like what is done for > encrypted filenames now) rather than at dentry allocation time. And fs/crypto/ > can export fscrypt_d_revalidate() rather than setting ->d_op itself. Problem is, differently from fscrypt, case-insensitive uses the d_hash() hook and for a lookup, we actually use dentry->d_parent->d_ops->d_hash(). Which works well, until you are flipping the casefold flag. Then the dentry already exists and you need to modify the d_ops on the fly, which I couldn't find precedent anywhere. I tried invalidating the dentry whenever we flip the flag, but then if it has negative dentries as children,I wasn't able to reliably invalidate it, and that's when I reached the limit of my knowledge in VFS. In particular, in every attempt I made to implement it like this, I was able to race and do a case-insensitive lookup on a directory that was just made case sensitive. I'm not saying there isn't a way. But it is a bit harder than this proposal. I tried it already and still didn't manage to make it work. Maybe someone who better understands vfs. > It's definitely ugly to have to handle the 3 cases of encrypt, casefold, and > encrypt+casefold separately -- and this will need to be duplicated for each > filesystem. But we do have to weigh that against adding additional complexity > and overhead to the VFS for everyone. If we do go with the VFS changes, please > try to make them as simple and unobtrusive as possible. Well, it is just not case-insensitive+fscrypt. Also overlayfs there. Probably more. So we have much more cases. I understand the VFS changes need to be very well thought, but when I worked on this it started to look a more correct solution than using the hooks. > diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c > index 3719efa546c6..cfa44adff2b3 100644 > --- a/fs/crypto/crypto.c > +++ b/fs/crypto/crypto.c > @@ -290,7 +290,7 @@ EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); > * Validate dentries in encrypted directories to make sure we aren't potentially > * caching stale dentries after a key has been added. > */ > -static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) > +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) > { > struct dentry *dir; > int err; > @@ -329,10 +329,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) > > return valid; > } > - > -const struct dentry_operations fscrypt_d_ops = { > - .d_revalidate = fscrypt_d_revalidate, > -}; > +EXPORT_SYMBOL_GPL(fscrypt_d_revalidate); > > /** > * fscrypt_initialize() - allocate major buffers for fs encryption. > diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h > index 130b50e5a011..4420670ac40a 100644 > --- a/fs/crypto/fscrypt_private.h > +++ b/fs/crypto/fscrypt_private.h > @@ -233,7 +233,6 @@ extern int fscrypt_crypt_block(const struct inode *inode, > unsigned int len, unsigned int offs, > gfp_t gfp_flags); > extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); > -extern const struct dentry_operations fscrypt_d_ops; > > extern void __printf(3, 4) __cold > fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); > diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c > index bb3b7fcfdd48..ec81b6a597aa 100644 > --- a/fs/crypto/hooks.c > +++ b/fs/crypto/hooks.c > @@ -116,7 +116,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, > spin_lock(&dentry->d_lock); > dentry->d_flags |= DCACHE_ENCRYPTED_NAME; > spin_unlock(&dentry->d_lock); > - d_set_d_op(dentry, &fscrypt_d_ops); > } > return err; > } > diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c > index 9fdd2b269d61..bd3c14e6b24a 100644 > --- a/fs/ext4/dir.c > +++ b/fs/ext4/dir.c > @@ -704,9 +704,47 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) > kfree(norm); > return ret; > } > +#endif /* !CONFIG_UNICODE */ > > -const struct dentry_operations ext4_dentry_ops = { > +#ifdef CONFIG_UNICODE > +static const struct dentry_operations ext4_ci_dentry_ops = { > + .d_hash = ext4_d_hash, > + .d_compare = ext4_d_compare, > +}; > +#endif > + > +#ifdef CONFIG_FS_ENCRYPTION > +static const struct dentry_operations ext4_encrypted_dentry_ops = { > + .d_revalidate = fscrypt_d_revalidate, > +}; > +#endif > + > +#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION) > +static const struct dentry_operations ext4_encrypted_ci_dentry_ops = { > .d_hash = ext4_d_hash, > .d_compare = ext4_d_compare, > + .d_revalidate = fscrypt_d_revalidate, > }; > #endif > + > +void ext4_set_d_ops(struct inode *dir, struct dentry *dentry) > +{ > +#ifdef CONFIG_FS_ENCRYPTION > + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { > +#ifdef CONFIG_UNICODE > + if (IS_CASEFOLDED(dir)) { > + d_set_d_op(dentry, &ext4_encrypted_ci_dentry_ops); > + return; > + } > +#endif > + d_set_d_op(dentry, &ext4_encrypted_dentry_ops); > + return; > + } > +#endif > +#ifdef CONFIG_UNICODE > + if (IS_CASEFOLDED(dir)) { > + d_set_d_op(dentry, &ext4_ci_dentry_ops); > + return; > + } > +#endif > +} > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index f8578caba40d..00a10015a53c 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -2499,6 +2499,8 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype) > extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, > void *buf, int buf_size); > > +void ext4_set_d_ops(struct inode *dir, struct dentry *dentry); > + > /* fsync.c */ > extern int ext4_sync_file(struct file *, loff_t, loff_t, int); > > @@ -3097,10 +3099,6 @@ static inline void ext4_unlock_group(struct super_block *sb, > /* dir.c */ > extern const struct file_operations ext4_dir_operations; > > -#ifdef CONFIG_UNICODE > -extern const struct dentry_operations ext4_dentry_ops; > -#endif > - > /* file.c */ > extern const struct inode_operations ext4_file_inode_operations; > extern const struct file_operations ext4_file_operations; > diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c > index a856997d87b5..4df1d074b393 100644 > --- a/fs/ext4/namei.c > +++ b/fs/ext4/namei.c > @@ -1608,6 +1608,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir, > struct buffer_head *bh; > > err = ext4_fname_prepare_lookup(dir, dentry, &fname); > + ext4_set_d_ops(dir, dentry); > if (err == -ENOENT) > return NULL; > if (err) > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index 1d82b56d9b11..ac593e9af270 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -4498,11 +4498,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) > goto failed_mount4; > } > > -#ifdef CONFIG_UNICODE > - if (sbi->s_encoding) > - sb->s_d_op = &ext4_dentry_ops; > -#endif > - > sb->s_root = d_make_root(root); > if (!sb->s_root) { > ext4_msg(sb, KERN_ERR, "get root dentry failed"); > diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h > index 1a7bffe78ed5..0de461f2225a 100644 > --- a/include/linux/fscrypt.h > +++ b/include/linux/fscrypt.h > @@ -120,6 +120,8 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) > > extern void fscrypt_free_bounce_page(struct page *bounce_page); > > +extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); > + > /* policy.c */ > extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); > extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
On Tue, Dec 03, 2019 at 04:21:02PM -0500, Gabriel Krisman Bertazi wrote: > Eric Biggers <ebiggers@kernel.org> writes: > > > On Tue, Dec 03, 2019 at 02:42:10PM -0500, Gabriel Krisman Bertazi wrote: > >> Gao Xiang <gaoxiang25@huawei.com> writes: > > >> I think Daniel's approach of moving this into VFS is the simplest way to > >> actually solve the issue, instead of extending and duplicating a lot of > >> functionality into filesystem hooks to support the possible mixes of > >> case-insensitive, overlayfs and fscrypt. > >> > > > > I think we can actually get everything we want using dentry_operations only, > > since the filesystem can set ->d_op during ->lookup() (like what is done for > > encrypted filenames now) rather than at dentry allocation time. And fs/crypto/ > > can export fscrypt_d_revalidate() rather than setting ->d_op itself. > > Problem is, differently from fscrypt, case-insensitive uses the d_hash() > hook and for a lookup, we actually use > dentry->d_parent->d_ops->d_hash(). Which works well, until you are flipping the > casefold flag. Then the dentry already exists and you need to modify > the d_ops on the fly, which I couldn't find precedent anywhere. I tried > invalidating the dentry whenever we flip the flag, but then if it has > negative dentries as children,I wasn't able to reliably invalidate it, > and that's when I reached the limit of my knowledge in VFS. In > particular, in every attempt I made to implement it like this, I was > able to race and do a case-insensitive lookup on a directory that was > just made case sensitive. > > I'm not saying there isn't a way. But it is a bit harder than this > proposal. I tried it already and still didn't manage to make it work. > Maybe someone who better understands vfs. Yes you're right, I forgot that for ->d_hash() and ->d_compare() it's actually the parent's directory dentry_operations that are used. > > > It's definitely ugly to have to handle the 3 cases of encrypt, casefold, and > > encrypt+casefold separately -- and this will need to be duplicated for each > > filesystem. But we do have to weigh that against adding additional complexity > > and overhead to the VFS for everyone. If we do go with the VFS changes, please > > try to make them as simple and unobtrusive as possible. > > Well, it is just not case-insensitive+fscrypt. Also overlayfs > there. Probably more. So we have much more cases. I understand the VFS > changes need to be very well thought, but when I worked on this it > started to look a more correct solution than using the hooks. Well the point of my proof-of-concept patch having separate ext4_ci_dentry_ops, ext4_encrypted_dentry_ops, and ext4_encrypted_ci_dentry_ops is supposed to be for overlayfs support -- since overlayfs requires that some operations are not present. If we didn't need overlayfs support, we could just use a single ext4_dentry_ops for all dentries instead. I think we could still support fscrypt, casefold, fscrypt+casefold, and fscrypt+overlayfs with dentry_operations only. It's casefold+overlayfs that's the biggest problem, due to the possibility of the casefold flag being set on a directory later as you pointed out. - Eric
On Mon, Dec 02, 2019 at 09:10:45PM -0800, Daniel Rosenberg wrote: > @@ -228,6 +229,13 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char > > #endif > > +bool needs_casefold(const struct inode *dir) > +{ > + return IS_CASEFOLDED(dir) && > + (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir)); > +} > +EXPORT_SYMBOL(needs_casefold); > + I'd suggest adding a check to make sure that dir->i_sb->s_encoding is non-NULL before needs_casefold() returns non-NULL. Otherwise a bug (or a fuzzed file system) which manages to set the S_CASEFOLD flag without having s_encoding be initialized might cause a NULL dereference. Also, maybe make needs_casefold() an inline function which returns 0 if CONFIG_UNICODE is not defined? That way the need for #ifdef CONFIG_UNICODE could be reduced. > @@ -247,7 +255,19 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c > * be no NUL in the ct/tcount data) > */ > const unsigned char *cs = READ_ONCE(dentry->d_name.name); > +#ifdef CONFIG_UNICODE > + struct inode *parent = dentry->d_parent->d_inode; > > + if (unlikely(needs_casefold(parent))) { > + const struct qstr n1 = QSTR_INIT(cs, tcount); > + const struct qstr n2 = QSTR_INIT(ct, tcount); > + int result = utf8_strncasecmp(dentry->d_sb->s_encoding, > + &n1, &n2); > + > + if (result >= 0 || sb_has_enc_strict_mode(dentry->d_sb)) > + return result; > + } > +#endif This is an example of how we could drop the #ifdef CONFIG_UNICODE (moving the declaration of 'parent' into the #if statement) if needs_casefold() always returns 0 if !defined(CONFIG_UNICODE). > @@ -2404,7 +2424,22 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) > * calculate the standard hash first, as the d_op->d_hash() > * routine may choose to leave the hash value unchanged. > */ > +#ifdef CONFIG_UNICODE > + unsigned char *hname = NULL; > + int hlen = name->len; > + > + if (IS_CASEFOLDED(dir->d_inode)) { > + hname = kmalloc(PATH_MAX, GFP_ATOMIC); > + if (!hname) > + return ERR_PTR(-ENOMEM); > + hlen = utf8_casefold(dir->d_sb->s_encoding, > + name, hname, PATH_MAX); > + } > + name->hash = full_name_hash(dir, hname ?: name->name, hlen); > + kfree(hname); > +#else > name->hash = full_name_hash(dir, name->name, name->len); > +#endif Perhaps this could be refactored out? It's also used in link_path_walk() and lookup_one_len_common(). (Note, there was some strageness in lookup_one_len_common(), where hname is freed twice, the first time using kvfree() which I don't believe is needed. But this can be fixed as part of the refactoring.) - Ted
diff --git a/fs/dcache.c b/fs/dcache.c index f7931b682a0d..575f3c2c3f77 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include <linux/bit_spinlock.h> #include <linux/rculist_bl.h> #include <linux/list_lru.h> +#include <linux/unicode.h> #include "internal.h" #include "mount.h" @@ -228,6 +229,13 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char #endif +bool needs_casefold(const struct inode *dir) +{ + return IS_CASEFOLDED(dir) && + (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir)); +} +EXPORT_SYMBOL(needs_casefold); + static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) { /* @@ -247,7 +255,19 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c * be no NUL in the ct/tcount data) */ const unsigned char *cs = READ_ONCE(dentry->d_name.name); +#ifdef CONFIG_UNICODE + struct inode *parent = dentry->d_parent->d_inode; + if (unlikely(needs_casefold(parent))) { + const struct qstr n1 = QSTR_INIT(cs, tcount); + const struct qstr n2 = QSTR_INIT(ct, tcount); + int result = utf8_strncasecmp(dentry->d_sb->s_encoding, + &n1, &n2); + + if (result >= 0 || sb_has_enc_strict_mode(dentry->d_sb)) + return result; + } +#endif return dentry_string_cmp(cs, ct, tcount); } @@ -2404,7 +2424,22 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) * calculate the standard hash first, as the d_op->d_hash() * routine may choose to leave the hash value unchanged. */ +#ifdef CONFIG_UNICODE + unsigned char *hname = NULL; + int hlen = name->len; + + if (IS_CASEFOLDED(dir->d_inode)) { + hname = kmalloc(PATH_MAX, GFP_ATOMIC); + if (!hname) + return ERR_PTR(-ENOMEM); + hlen = utf8_casefold(dir->d_sb->s_encoding, + name, hname, PATH_MAX); + } + name->hash = full_name_hash(dir, hname ?: name->name, hlen); + kfree(hname); +#else name->hash = full_name_hash(dir, name->name, name->len); +#endif if (dir->d_flags & DCACHE_OP_HASH) { int err = dir->d_op->d_hash(dir, name); if (unlikely(err < 0)) diff --git a/fs/namei.c b/fs/namei.c index 2dda552bcf7a..b8d5cb0994ec 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -39,6 +39,7 @@ #include <linux/bitops.h> #include <linux/init_task.h> #include <linux/uaccess.h> +#include <linux/unicode.h> #include "internal.h" #include "mount.h" @@ -2062,6 +2063,10 @@ static inline u64 hash_name(const void *salt, const char *name) static int link_path_walk(const char *name, struct nameidata *nd) { int err; +#ifdef CONFIG_UNICODE + char *hname = NULL; + int hlen = 0; +#endif if (IS_ERR(name)) return PTR_ERR(name); @@ -2078,9 +2083,21 @@ static int link_path_walk(const char *name, struct nameidata *nd) err = may_lookup(nd); if (err) return err; - +#ifdef CONFIG_UNICODE + if (needs_casefold(nd->path.dentry->d_inode)) { + struct qstr str = QSTR_INIT(name, PATH_MAX); + + hname = kmalloc(PATH_MAX, GFP_ATOMIC); + if (!hname) + return -ENOMEM; + hlen = utf8_casefold(nd->path.dentry->d_sb->s_encoding, + &str, hname, PATH_MAX); + } + hash_len = hash_name(nd->path.dentry, hname ?: name); + kfree(hname); +#else hash_len = hash_name(nd->path.dentry, name); - +#endif type = LAST_NORM; if (name[0] == '.') switch (hashlen_len(hash_len)) { case 2: @@ -2452,9 +2469,29 @@ EXPORT_SYMBOL(vfs_path_lookup); static int lookup_one_len_common(const char *name, struct dentry *base, int len, struct qstr *this) { +#ifdef CONFIG_UNICODE + char *hname = NULL; + int hlen = len; + + if (needs_casefold(base->d_inode)) { + struct qstr str = QSTR_INIT(name, len); + + hname = kmalloc(PATH_MAX, GFP_ATOMIC); + if (!hname) + return -ENOMEM; + hlen = utf8_casefold(base->d_sb->s_encoding, + &str, hname, PATH_MAX); + } + this->hash = full_name_hash(base, hname ?: name, hlen); + kvfree(hname); +#else + this->hash = full_name_hash(base, name, len); +#endif this->name = name; this->len = len; - this->hash = full_name_hash(base, name, len); +#ifdef CONFIG_UNICODE + kfree(hname); +#endif if (!len) return -EACCES; diff --git a/include/linux/fs.h b/include/linux/fs.h index c159a8bdee8b..38d1c20f3e6f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1382,6 +1382,12 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_ACTIVE (1<<30) #define SB_NOUSER (1<<31) +/* These flags relate to encoding and casefolding */ +#define SB_ENC_STRICT_MODE_FL (1 << 0) + +#define sb_has_enc_strict_mode(sb) \ + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) + /* * Umount options */ @@ -1449,6 +1455,10 @@ struct super_block { #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; +#endif +#ifdef CONFIG_UNICODE + struct unicode_map *s_encoding; + __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ @@ -2044,6 +2054,8 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +extern bool needs_casefold(const struct inode *dir); + static inline bool HAS_UNMAPPED_ID(struct inode *inode) { return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
Ext4 and F2fs are both using casefolding, and they, along with any other filesystem that adds the feature, will be using identical dentry_ops. Additionally, those dentry ops interfere with the dentry_ops required for fscrypt once we add support for casefolding and encryption. Moving this into the vfs removes code duplication as well as the complication with encryption. Currently this is pretty close to just moving the existing f2fs/ext4 code up a level into the vfs, although there is a lot of room for improvement now. Signed-off-by: Daniel Rosenberg <drosen@google.com> --- fs/dcache.c | 35 +++++++++++++++++++++++++++++++++++ fs/namei.c | 43 ++++++++++++++++++++++++++++++++++++++++--- include/linux/fs.h | 12 ++++++++++++ 3 files changed, 87 insertions(+), 3 deletions(-)