diff mbox series

[v4,07/10] tmpfs: Add casefold lookup support

Message ID 20240911144502.115260-8-andrealmeid@igalia.com (mailing list archive)
State New
Headers show
Series tmpfs: Add case-insensitive support for tmpfs | expand

Commit Message

André Almeida Sept. 11, 2024, 2:44 p.m. UTC
Enable casefold lookup in tmpfs, based on the encoding defined by
userspace. That means that instead of comparing byte per byte a file
name, it compares to a case-insensitive equivalent of the Unicode
string.

* Dcache handling

There's a special need when dealing with case-insensitive dentries.
First of all, we currently invalidated every negative casefold dentries.
That happens because currently VFS code has no proper support to deal
with that, giving that it could incorrectly reuse a previous filename
for a new file that has a casefold match. For instance, this could
happen:

$ mkdir DIR
$ rm -r DIR
$ mkdir dir
$ ls
DIR/

And would be perceived as inconsistency from userspace point of view,
because even that we match files in a case-insensitive manner, we still
honor whatever is the initial filename.

Along with that, tmpfs stores only the first equivalent name dentry used
in the dcache, preventing duplications of dentries in the dcache. The
d_compare() version for casefold files uses a normalized string, so the
filename under lookup will be compared to another normalized string for
the existing file, achieving a casefolded lookup.

* Enabling casefold via mount options

Most filesystems have their data stored in disk, so casefold option need
to be enabled when building a filesystem on a device (via mkfs).
However, as tmpfs is a RAM backed filesystem, there's no disk
information and thus no mkfs to store information about casefold.

For tmpfs, create casefold options for mounting. Userspace can then
enable casefold support for a mount point using:

$ mount -t tmpfs -o casefold=utf8-12.1.0 fs_name mount_dir/

Userspace must set what Unicode standard is aiming to. The available
options depends on what the kernel Unicode subsystem supports.

And for strict encoding:

$ mount -t tmpfs -o casefold=utf8-12.1.0,strict_encoding fs_name mount_dir/

Strict encoding means that tmpfs will refuse to create invalid UTF-8
sequences. When this option is not enabled, any invalid sequence will be
treated as an opaque byte sequence, ignoring the encoding thus not being
able to be looked up in a case-insensitive way.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
Changes from v3:
- Simplified shmem_parse_opt_casefold()
- sb->s_d_op is set to shmem_ci_dentry_ops during mount time
- got rid of shmem_lookup(), modified simple_lookup()

Changes from v2:
- simple_lookup() now sets d_ops
- reworked shmem_parse_opt_casefold()
- if `mount -o casefold` has no param, load latest UTF-8 version
- using (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir) when possible
---
 mm/shmem.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 115 insertions(+), 4 deletions(-)

Comments

Gabriel Krisman Bertazi Sept. 12, 2024, 7:04 p.m. UTC | #1
André Almeida <andrealmeid@igalia.com> writes:

> Enable casefold lookup in tmpfs, based on the encoding defined by
> userspace. That means that instead of comparing byte per byte a file
> name, it compares to a case-insensitive equivalent of the Unicode
> string.

I think this looks much better.

A few things left. First, I'd suggest you merge patch 5 and this
one. There is not much sense in keeping them separate; patch 5 is a
dependency that won't crash the build, but might cause runtime errors if anyone
cherry-picks the feature but forgets to pull it.

> diff --git a/mm/shmem.c b/mm/shmem.c
> index 5a77acf6ac6a..4fde63596ab3 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -40,6 +40,8 @@
>  #include <linux/fs_parser.h>
>  #include <linux/swapfile.h>
>  #include <linux/iversion.h>
> +#include <linux/unicode.h>

> +#include <linux/parser.h>

I think you don't need this header anymore

>  #include "swap.h"
>  
>  static struct vfsmount *shm_mnt __ro_after_init;
> @@ -123,6 +125,8 @@ struct shmem_options {
>  	bool noswap;
>  	unsigned short quota_types;
>  	struct shmem_quota_limits qlimits;
> +	struct unicode_map *encoding;
> +	bool strict_encoding;
>  #define SHMEM_SEEN_BLOCKS 1
>  #define SHMEM_SEEN_INODES 2
>  #define SHMEM_SEEN_HUGE 4
> @@ -3427,6 +3431,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>  	if (IS_ERR(inode))
>  		return PTR_ERR(inode);
>  
> +	if (IS_ENABLED(CONFIG_UNICODE) &&
> +	    !generic_ci_validate_strict_name(dir, &dentry->d_name))
> +		return -EINVAL;
> +

Commenting here, but this is about generic_ci_validate_strict_name.  Can
you make it an inline function in the header file instead? This way you
can fold the IS_ENABLED into it and also avoid the function call.

>  	error = simple_acl_create(dir, inode);
>  	if (error)
>  		goto out_iput;
> @@ -3442,7 +3450,12 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>  	dir->i_size += BOGO_DIRENT_SIZE;
>  	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
>  	inode_inc_iversion(dir);
> -	d_instantiate(dentry, inode);
> +
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
> +
>  	dget(dentry); /* Extra count - pin the dentry in core */
>  	return error;
>  
> @@ -3533,7 +3546,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir,
>  	inc_nlink(inode);
>  	ihold(inode);	/* New dentry reference */
>  	dget(dentry);	/* Extra pinning count for the created dentry */
> -	d_instantiate(dentry, inode);
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
>  out:
>  	return ret;
>  }
> @@ -3553,6 +3569,14 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
>  	inode_inc_iversion(dir);
>  	drop_nlink(inode);
>  	dput(dentry);	/* Undo the count from "create" - does all the work */
> +
> +	/*
> +	 * For now, VFS can't deal with case-insensitive negative dentries, so
> +	 * we invalidate them
> +	 */
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		d_invalidate(dentry);
> +
>  	return 0;
>  }
>  
> @@ -3697,7 +3721,10 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
>  	dir->i_size += BOGO_DIRENT_SIZE;
>  	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
>  	inode_inc_iversion(dir);
> -	d_instantiate(dentry, inode);
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		d_add(dentry, inode);
> +	else
> +		d_instantiate(dentry, inode);
>  	dget(dentry);
>  	return 0;
>  
> @@ -4050,6 +4077,9 @@ enum shmem_param {
>  	Opt_usrquota_inode_hardlimit,
>  	Opt_grpquota_block_hardlimit,
>  	Opt_grpquota_inode_hardlimit,
> +	Opt_casefold_version,
> +	Opt_casefold,
> +	Opt_strict_encoding,
>  };
>  
>  static const struct constant_table shmem_param_enums_huge[] = {
> @@ -4081,9 +4111,53 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
>  	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
>  	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
>  #endif
> +	fsparam_string("casefold",	Opt_casefold_version),
> +	fsparam_flag  ("casefold",	Opt_casefold),
> +	fsparam_flag  ("strict_encoding", Opt_strict_encoding),
>  	{}
>  };
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
> +				    bool latest_version)
> +{
> +	struct shmem_options *ctx = fc->fs_private;
> +	unsigned int version = UTF8_LATEST;
> +	struct unicode_map *encoding;
> +	char *version_str = param->string + 5;
> +
> +	if (!latest_version) {
> +		if (strncmp(param->string, "utf8-", 5))
> +			return invalfc(fc, "Only UTF-8 encodings are supported "
> +				       "in the format: utf8-<version number>");
> +
> +		version = utf8_parse_version(version_str);
> +		if (version < 0)
> +			return invalfc(fc, "Invalid UTF-8 version: %s", version_str);
> +	}
> +
> +	encoding = utf8_load(version);
> +
> +	if (IS_ERR(encoding)) {
> +		return invalfc(fc, "Failed loading UTF-8 version: utf8-%u.%u.%u\n",
> +		unicode_major(version), unicode_minor(version), unicode_rev(version));

bad indentation?

> +	}
> +
> +	pr_info("tmpfs: Using encoding : utf8-%u.%u.%u\n",
> +		unicode_major(version), unicode_minor(version), unicode_rev(version));
> +
> +	ctx->encoding = encoding;
> +
> +	return 0;
> +}
> +#else
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
> +				    bool latest_version)
> +{
> +	return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n");
> +}
> +#endif
> +
>  static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>  {
>  	struct shmem_options *ctx = fc->fs_private;
> @@ -4242,6 +4316,13 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
>  				       "Group quota inode hardlimit too large.");
>  		ctx->qlimits.grpquota_ihardlimit = size;
>  		break;
> +	case Opt_casefold_version:
> +		return shmem_parse_opt_casefold(fc, param, false);
> +	case Opt_casefold:
> +		return shmem_parse_opt_casefold(fc, param, true);
> +	case Opt_strict_encoding:
> +		ctx->strict_encoding = true;

Using strict_encoding without encoding should be explicitly forbidden.

> +		break;
>  	}
>  	return 0;
>  
> @@ -4471,6 +4552,11 @@ static void shmem_put_super(struct super_block *sb)
>  {
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (sb->s_encoding)
> +		utf8_unload(sb->s_encoding);
> +#endif
> +
>  #ifdef CONFIG_TMPFS_QUOTA
>  	shmem_disable_quotas(sb);
>  #endif
> @@ -4481,6 +4567,17 @@ static void shmem_put_super(struct super_block *sb)
>  	sb->s_fs_info = NULL;
>  }
>  
> +#if IS_ENABLED(CONFIG_UNICODE)
> +static const struct dentry_operations shmem_ci_dentry_ops = {
> +	.d_hash = generic_ci_d_hash,
> +	.d_compare = generic_ci_d_compare,
> +#ifdef CONFIG_FS_ENCRYPTION
> +	.d_revalidate = fscrypt_d_revalidate,
> +#endif
> +	.d_delete = always_delete_dentry,
> +};
> +#endif
> +
>  static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>  {
>  	struct shmem_options *ctx = fc->fs_private;
> @@ -4515,9 +4612,21 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>  	}
>  	sb->s_export_op = &shmem_export_ops;
>  	sb->s_flags |= SB_NOSEC | SB_I_VERSION;
> +
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (ctx->encoding) {
> +		sb->s_encoding = ctx->encoding;
> +		sb->s_d_op = &shmem_ci_dentry_ops;
> +		if (ctx->strict_encoding)
> +			sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
> +	}
>  #else
> -	sb->s_flags |= SB_NOUSER;
> +	sb->s_d_op = &simple_dentry_operations;

Moving simple_dentry_operations to be set at s_d_op should be a separate
patch.

It is a change that has non-obvious side effects (i.e. the way we
treat the root dentry) so it needs proper review by itself.  It is
also not related to the rest of the case-insensitive patch.

Also, why is it done only for CONFIG_UNICODE=n?

>  #endif
> +
> +#else
> +	sb->s_flags |= SB_NOUSER;
> +#endif /* CONFIG_TMPFS */
>  	sbinfo->max_blocks = ctx->blocks;
>  	sbinfo->max_inodes = ctx->inodes;
>  	sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
> @@ -4791,6 +4900,8 @@ int shmem_init_fs_context(struct fs_context *fc)
>  	ctx->uid = current_fsuid();
>  	ctx->gid = current_fsgid();
>  
> +	ctx->encoding = NULL;
> +
>  	fc->fs_private = ctx;
>  	fc->ops = &shmem_fs_context_ops;
>  	return 0;
kernel test robot Sept. 13, 2024, 4:28 p.m. UTC | #2
Hi André,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on tytso-ext4/dev brauner-vfs/vfs.all linus/master v6.11-rc7 next-20240913]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/libfs-Create-the-helper-function-generic_ci_validate_strict_name/20240911-224740
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20240911144502.115260-8-andrealmeid%40igalia.com
patch subject: [PATCH v4 07/10] tmpfs: Add casefold lookup support
config: i386-buildonly-randconfig-005-20240913 (https://download.01.org/0day-ci/archive/20240914/202409140037.lwGxfq6h-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240914/202409140037.lwGxfq6h-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409140037.lwGxfq6h-lkp@intel.com/

All errors (new ones prefixed by >>):

>> mm/shmem.c:4723:18: error: use of undeclared identifier 'fscrypt_d_revalidate'
    4723 |         .d_revalidate = fscrypt_d_revalidate,
         |                         ^
   1 error generated.


vim +/fscrypt_d_revalidate +4723 mm/shmem.c

  4717	
  4718	#if IS_ENABLED(CONFIG_UNICODE)
  4719	static const struct dentry_operations shmem_ci_dentry_ops = {
  4720		.d_hash = generic_ci_d_hash,
  4721		.d_compare = generic_ci_d_compare,
  4722	#ifdef CONFIG_FS_ENCRYPTION
> 4723		.d_revalidate = fscrypt_d_revalidate,
  4724	#endif
  4725		.d_delete = always_delete_dentry,
  4726	};
  4727	#endif
  4728
kernel test robot Sept. 13, 2024, 6:25 p.m. UTC | #3
Hi André,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on tytso-ext4/dev brauner-vfs/vfs.all linus/master v6.11-rc7 next-20240913]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/libfs-Create-the-helper-function-generic_ci_validate_strict_name/20240911-224740
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20240911144502.115260-8-andrealmeid%40igalia.com
patch subject: [PATCH v4 07/10] tmpfs: Add casefold lookup support
config: csky-randconfig-002-20240913 (https://download.01.org/0day-ci/archive/20240914/202409140236.RR9Gbvqh-lkp@intel.com/config)
compiler: csky-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240914/202409140236.RR9Gbvqh-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409140236.RR9Gbvqh-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> mm/shmem.c:4719:39: warning: 'shmem_ci_dentry_ops' defined but not used [-Wunused-const-variable=]
    4719 | static const struct dentry_operations shmem_ci_dentry_ops = {
         |                                       ^~~~~~~~~~~~~~~~~~~


vim +/shmem_ci_dentry_ops +4719 mm/shmem.c

  4717	
  4718	#if IS_ENABLED(CONFIG_UNICODE)
> 4719	static const struct dentry_operations shmem_ci_dentry_ops = {
  4720		.d_hash = generic_ci_d_hash,
  4721		.d_compare = generic_ci_d_compare,
  4722	#ifdef CONFIG_FS_ENCRYPTION
  4723		.d_revalidate = fscrypt_d_revalidate,
  4724	#endif
  4725		.d_delete = always_delete_dentry,
  4726	};
  4727	#endif
  4728
André Almeida Oct. 2, 2024, 1:40 a.m. UTC | #4
Hey Krisman,

Em 12/09/2024 16:04, Gabriel Krisman Bertazi escreveu:
> André Almeida <andrealmeid@igalia.com> writes:
> 

[...]

>> +#if IS_ENABLED(CONFIG_UNICODE)
>> +	if (ctx->encoding) {
>> +		sb->s_encoding = ctx->encoding;
>> +		sb->s_d_op = &shmem_ci_dentry_ops;
>> +		if (ctx->strict_encoding)
>> +			sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
>> +	}
>>   #else
>> -	sb->s_flags |= SB_NOUSER;
>> +	sb->s_d_op = &simple_dentry_operations;
> 
> Moving simple_dentry_operations to be set at s_d_op should be a separate
> patch.
> 
> It is a change that has non-obvious side effects (i.e. the way we
> treat the root dentry) so it needs proper review by itself.  It is
> also not related to the rest of the case-insensitive patch.
> 

The idea of setting simple_dentry_operations come from my previous 
approach of having our own shmem_lookup(), replacing simple_lookup(). 
Now that we are settled to keep with simple_lookup() anyway (that 
already sets simple_dentry_operations), I think we don't need this 
change anymore, right?

This will be set for every dentry that doesn't have a 
dentry->d_sb->s_d_op. Case-insensitive mount points will have this set, 
so we don't risk overwriting it.
Gabriel Krisman Bertazi Oct. 2, 2024, 9:29 p.m. UTC | #5
André Almeida <andrealmeid@igalia.com> writes:

> Hey Krisman,
>
> Em 12/09/2024 16:04, Gabriel Krisman Bertazi escreveu:
>> André Almeida <andrealmeid@igalia.com> writes:
>> 
>
> [...]
>
>>> +#if IS_ENABLED(CONFIG_UNICODE)
>>> +	if (ctx->encoding) {
>>> +		sb->s_encoding = ctx->encoding;
>>> +		sb->s_d_op = &shmem_ci_dentry_ops;
>>> +		if (ctx->strict_encoding)
>>> +			sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
>>> +	}
>>>   #else
>>> -	sb->s_flags |= SB_NOUSER;
>>> +	sb->s_d_op = &simple_dentry_operations;
>> Moving simple_dentry_operations to be set at s_d_op should be a
>> separate
>> patch.
>> It is a change that has non-obvious side effects (i.e. the way we
>> treat the root dentry) so it needs proper review by itself.  It is
>> also not related to the rest of the case-insensitive patch.
>> 
>
> The idea of setting simple_dentry_operations come from my previous
> approach of having our own shmem_lookup(), replacing
> simple_lookup(). Now that we are settled to keep with simple_lookup()
> anyway (that already sets simple_dentry_operations), I think we don't
> need this change anymore, right?

Up to you, really. If you don't need it to support casefold lookup in
tmpfs, it doesn't need to be part of the same patchset.

> This will be set for every dentry that doesn't have a
> dentry->d_sb->s_d_op. Case-insensitive mount points will have this set,
> so we don't risk overwriting it.

I encourage you to send a new version with this.  makes sense to me.
diff mbox series

Patch

diff --git a/mm/shmem.c b/mm/shmem.c
index 5a77acf6ac6a..4fde63596ab3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -40,6 +40,8 @@ 
 #include <linux/fs_parser.h>
 #include <linux/swapfile.h>
 #include <linux/iversion.h>
+#include <linux/unicode.h>
+#include <linux/parser.h>
 #include "swap.h"
 
 static struct vfsmount *shm_mnt __ro_after_init;
@@ -123,6 +125,8 @@  struct shmem_options {
 	bool noswap;
 	unsigned short quota_types;
 	struct shmem_quota_limits qlimits;
+	struct unicode_map *encoding;
+	bool strict_encoding;
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
@@ -3427,6 +3431,10 @@  shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
+	if (IS_ENABLED(CONFIG_UNICODE) &&
+	    !generic_ci_validate_strict_name(dir, &dentry->d_name))
+		return -EINVAL;
+
 	error = simple_acl_create(dir, inode);
 	if (error)
 		goto out_iput;
@@ -3442,7 +3450,12 @@  shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
 	inode_inc_iversion(dir);
-	d_instantiate(dentry, inode);
+
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		d_add(dentry, inode);
+	else
+		d_instantiate(dentry, inode);
+
 	dget(dentry); /* Extra count - pin the dentry in core */
 	return error;
 
@@ -3533,7 +3546,10 @@  static int shmem_link(struct dentry *old_dentry, struct inode *dir,
 	inc_nlink(inode);
 	ihold(inode);	/* New dentry reference */
 	dget(dentry);	/* Extra pinning count for the created dentry */
-	d_instantiate(dentry, inode);
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		d_add(dentry, inode);
+	else
+		d_instantiate(dentry, inode);
 out:
 	return ret;
 }
@@ -3553,6 +3569,14 @@  static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 	inode_inc_iversion(dir);
 	drop_nlink(inode);
 	dput(dentry);	/* Undo the count from "create" - does all the work */
+
+	/*
+	 * For now, VFS can't deal with case-insensitive negative dentries, so
+	 * we invalidate them
+	 */
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		d_invalidate(dentry);
+
 	return 0;
 }
 
@@ -3697,7 +3721,10 @@  static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
 	inode_inc_iversion(dir);
-	d_instantiate(dentry, inode);
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		d_add(dentry, inode);
+	else
+		d_instantiate(dentry, inode);
 	dget(dentry);
 	return 0;
 
@@ -4050,6 +4077,9 @@  enum shmem_param {
 	Opt_usrquota_inode_hardlimit,
 	Opt_grpquota_block_hardlimit,
 	Opt_grpquota_inode_hardlimit,
+	Opt_casefold_version,
+	Opt_casefold,
+	Opt_strict_encoding,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -4081,9 +4111,53 @@  const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
 	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
 #endif
+	fsparam_string("casefold",	Opt_casefold_version),
+	fsparam_flag  ("casefold",	Opt_casefold),
+	fsparam_flag  ("strict_encoding", Opt_strict_encoding),
 	{}
 };
 
+#if IS_ENABLED(CONFIG_UNICODE)
+static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
+				    bool latest_version)
+{
+	struct shmem_options *ctx = fc->fs_private;
+	unsigned int version = UTF8_LATEST;
+	struct unicode_map *encoding;
+	char *version_str = param->string + 5;
+
+	if (!latest_version) {
+		if (strncmp(param->string, "utf8-", 5))
+			return invalfc(fc, "Only UTF-8 encodings are supported "
+				       "in the format: utf8-<version number>");
+
+		version = utf8_parse_version(version_str);
+		if (version < 0)
+			return invalfc(fc, "Invalid UTF-8 version: %s", version_str);
+	}
+
+	encoding = utf8_load(version);
+
+	if (IS_ERR(encoding)) {
+		return invalfc(fc, "Failed loading UTF-8 version: utf8-%u.%u.%u\n",
+		unicode_major(version), unicode_minor(version), unicode_rev(version));
+	}
+
+	pr_info("tmpfs: Using encoding : utf8-%u.%u.%u\n",
+		unicode_major(version), unicode_minor(version), unicode_rev(version));
+
+	ctx->encoding = encoding;
+
+	return 0;
+}
+#else
+static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
+				    bool latest_version)
+{
+	return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n");
+}
+#endif
+
 static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct shmem_options *ctx = fc->fs_private;
@@ -4242,6 +4316,13 @@  static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 				       "Group quota inode hardlimit too large.");
 		ctx->qlimits.grpquota_ihardlimit = size;
 		break;
+	case Opt_casefold_version:
+		return shmem_parse_opt_casefold(fc, param, false);
+	case Opt_casefold:
+		return shmem_parse_opt_casefold(fc, param, true);
+	case Opt_strict_encoding:
+		ctx->strict_encoding = true;
+		break;
 	}
 	return 0;
 
@@ -4471,6 +4552,11 @@  static void shmem_put_super(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (sb->s_encoding)
+		utf8_unload(sb->s_encoding);
+#endif
+
 #ifdef CONFIG_TMPFS_QUOTA
 	shmem_disable_quotas(sb);
 #endif
@@ -4481,6 +4567,17 @@  static void shmem_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
+#if IS_ENABLED(CONFIG_UNICODE)
+static const struct dentry_operations shmem_ci_dentry_ops = {
+	.d_hash = generic_ci_d_hash,
+	.d_compare = generic_ci_d_compare,
+#ifdef CONFIG_FS_ENCRYPTION
+	.d_revalidate = fscrypt_d_revalidate,
+#endif
+	.d_delete = always_delete_dentry,
+};
+#endif
+
 static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 {
 	struct shmem_options *ctx = fc->fs_private;
@@ -4515,9 +4612,21 @@  static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	}
 	sb->s_export_op = &shmem_export_ops;
 	sb->s_flags |= SB_NOSEC | SB_I_VERSION;
+
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (ctx->encoding) {
+		sb->s_encoding = ctx->encoding;
+		sb->s_d_op = &shmem_ci_dentry_ops;
+		if (ctx->strict_encoding)
+			sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
+	}
 #else
-	sb->s_flags |= SB_NOUSER;
+	sb->s_d_op = &simple_dentry_operations;
 #endif
+
+#else
+	sb->s_flags |= SB_NOUSER;
+#endif /* CONFIG_TMPFS */
 	sbinfo->max_blocks = ctx->blocks;
 	sbinfo->max_inodes = ctx->inodes;
 	sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
@@ -4791,6 +4900,8 @@  int shmem_init_fs_context(struct fs_context *fc)
 	ctx->uid = current_fsuid();
 	ctx->gid = current_fsgid();
 
+	ctx->encoding = NULL;
+
 	fc->fs_private = ctx;
 	fc->ops = &shmem_fs_context_ops;
 	return 0;