diff mbox series

[v3,6/9] tmpfs: Add casefold lookup support

Message ID 20240905190252.461639-7-andrealmeid@igalia.com (mailing list archive)
State New
Headers show
Series tmpfs: Add case-insensitive support for tmpfs | expand

Commit Message

André Almeida Sept. 5, 2024, 7:02 p.m. UTC
Enable casefold lookup in tmpfs, based on the encoding defined by
userspace. That means that instead of comparing byte per byte a file
name, it compares to a case-insensitive equivalent of the Unicode
string.

* Dcache handling

There's a special need when dealing with case-insensitive dentries.
First of all, we currently invalidated every negative casefold dentries.
That happens because currently VFS code has no proper support to deal
with that, giving that it could incorrectly reuse a previous filename
for a new file that has a casefold match. For instance, this could
happen:

$ mkdir DIR
$ rm -r DIR
$ mkdir dir
$ ls
DIR/

And would be perceived as inconsistency from userspace point of view,
because even that we match files in a case-insensitive manner, we still
honor whatever is the initial filename.

Along with that, tmpfs stores only the first equivalent name dentry used
in the dcache, preventing duplications of dentries in the dcache. The
d_compare() version for casefold files uses a normalized string, so the
filename under lookup will be compared to another normalized string for
the existing file, achieving a casefolded lookup.

* Enabling casefold via mount options

Most filesystems have their data stored in disk, so casefold option need
to be enabled when building a filesystem on a device (via mkfs).
However, as tmpfs is a RAM backed filesystem, there's no disk
information and thus no mkfs to store information about casefold.

For tmpfs, create casefold options for mounting. Userspace can then
enable casefold support for a mount point using:

$ mount -t tmpfs -o casefold=utf8-12.1.0 fs_name mount_dir/

Userspace must set what Unicode standard is aiming to. The available
options depends on what the kernel Unicode subsystem supports.

And for strict encoding:

$ mount -t tmpfs -o casefold=utf8-12.1.0,strict_encoding fs_name mount_dir/

Strict encoding means that tmpfs will refuse to create invalid UTF-8
sequences. When this option is not enabled, any invalid sequence will be
treated as an opaque byte sequence, ignoring the encoding thus not being
able to be looked up in a case-insensitive way.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
Changes from v2:
- shmem_lookup() now sets d_ops
- reworked shmem_parse_opt_casefold()
- if `mount -o casefold` has no param, load latest UTF-8 version
- using (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir) when possible
---
 mm/shmem.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 138 insertions(+), 4 deletions(-)

Comments

Gabriel Krisman Bertazi Sept. 5, 2024, 9:28 p.m. UTC | #1
Hi,

André Almeida <andrealmeid@igalia.com> writes:
> @@ -3427,6 +3431,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>  	if (IS_ERR(inode))
>  		return PTR_ERR(inode);
>  
> +	if (IS_ENABLED(CONFIG_UNICODE))
> +		if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
> +			return -EINVAL;
> +

if (IS_ENABLED(CONFIG_UNICODE) &&
    generic_ci_validate_strict_name(dir, &dentry->d_name))

>  static const struct constant_table shmem_param_enums_huge[] = {
> @@ -4081,9 +4111,62 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
>  	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
>  	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
>  #endif
> +	fsparam_string("casefold",	Opt_casefold_version),
> +	fsparam_flag  ("casefold",	Opt_casefold),
> +	fsparam_flag  ("strict_encoding", Opt_strict_encoding),

I don't know if it is possible, but can we do it with a single parameter?

> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
> +				    bool latest_version)

Instead of the boolean, can't you check if param->string != NULL? (real
question, I never used fs_parameter.

> +{
> +	struct shmem_options *ctx = fc->fs_private;
> +	unsigned int maj = 0, min = 0, rev = 0, version = 0;
> +	struct unicode_map *encoding;
> +	char *version_str = param->string + 5;
> +	int ret;

unsigned int version = UTF8_LATEST;

and kill the if/else below:
> +
> +	if (latest_version) {
> +		version = UTF8_LATEST;
> +	} else {
> +		if (strncmp(param->string, "utf8-", 5))
> +			return invalfc(fc, "Only UTF-8 encodings are supported "
> +				       "in the format: utf8-<version number>");
> +
> +		ret = utf8_parse_version(version_str, &maj, &min, &rev);

utf8_parse_version interface could return UNICODE_AGE() already, so we hide the details
from the caller. wdyt?

> +		if (ret)
> +			return invalfc(fc, "Invalid UTF-8 version: %s", version_str);
> +
> +		version = UNICODE_AGE(maj, min, rev);
> +	}
> +
> +	encoding = utf8_load(version);
> +
> +	if (IS_ERR(encoding)) {
> +		if (latest_version)
> +			return invalfc(fc, "Failed loading latest UTF-8 version");
> +		else
> +			return invalfc(fc, "Failed loading UTF-8 version: %s", version_str);

The following covers both legs (untested):

if (IS_ERR(encoding))
  return invalfc(fc, "Failed loading UTF-8 version: utf8-%u.%u.%u\n"",
	           unicode_maj(version), unicode_min(version), unicode_rev(version));

> +	if (latest_version)
> +		pr_info("tmpfs: Using the latest UTF-8 version available");
> +	else
> +		pr_info("tmpfs: Using encoding provided by mount
> options: %s\n", param->string);

The following covers both legs (untested):

pr_info (fc, "tmpfs: Using encoding : utf8-%u.%u.%u\n"
         unicode_maj(version), unicode_min(version), unicode_rev(version));

> +
> +	ctx->encoding = encoding;
> +
> +	return 0;
> +}
> +#else
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
> +				    bool latest_version)
> +{
> +	return invalfc(fc, "tmpfs: No kernel support for casefold filesystems\n");
> +}

A message like "Kernel not built with CONFIG_UNICODE" immediately tells
you how to fix it.

> @@ -4515,6 +4610,16 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>  	}
>  	sb->s_export_op = &shmem_export_ops;
>  	sb->s_flags |= SB_NOSEC | SB_I_VERSION;
> +
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (ctx->encoding) {
> +		sb->s_encoding = ctx->encoding;
> +		generic_set_sb_d_ops(sb);

This is the right place for setting d_ops (see the next comment), but you
should be loading generic_ci_always_del_dentry_ops, right?

Also, since generic_ci_always_del_dentry_ops is only used by this one,
can you move it to this file?

> +static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
> +{
> +	const struct dentry_operations *d_ops = &simple_dentry_operations;
> +
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (dentry->d_sb->s_encoding)
> +		d_ops = &generic_ci_always_del_dentry_ops;
> +#endif

This needs to be done at mount time through sb->s_d_op. See

https://lore.kernel.org/all/20240221171412.10710-1-krisman@suse.de/

I suppose we can do it at mount-time for
generic_ci_always_del_dentry_ops and simple_dentry_operations.

> +
> +	if (dentry->d_name.len > NAME_MAX)
> +		return ERR_PTR(-ENAMETOOLONG);
> +
> +	if (!dentry->d_sb->s_d_op)
> +		d_set_d_op(dentry, d_ops);
> +
> +	/*
> +	 * For now, VFS can't deal with case-insensitive negative dentries, so
> +	 * we prevent them from being created
> +	 */
> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +		return NULL;

Thinking out loud:

I misunderstood always_delete_dentry before.  It removes negative
dentries right after the lookup, since ->d_delete is called on dput.

But you still need this check here, IMO, to prevent the negative dentry
from ever being hashed. Otherwise it can be found by a concurrent
lookup.  And you cannot drop ->d_delete from the case-insensitive
operations too, because we still wants it for !IS_CASEFOLDED(dir).

The window is that, without this code, the negative dentry dentry would
be hashed in d_add() and a concurrent lookup might find it between that
time and the d_put, where it is removed at the end of the concurrent
lookup.

All of this would hopefully go away with the negative dentry for
casefolded directories.

> +
> +	d_add(dentry, NULL);
> +
> +	return NULL;
> +}

The sole reason you are doing this custom function is to exclude negative
dentries from casefolded directories. I doubt we care about the extra
check being done.  Can we just do it in simple_lookup?

> +
>  static const struct inode_operations shmem_dir_inode_operations = {
>  #ifdef CONFIG_TMPFS
>  	.getattr	= shmem_getattr,
>  	.create		= shmem_create,
> -	.lookup		= simple_lookup,
> +	.lookup		= shmem_lookup,
>  	.link		= shmem_link,
>  	.unlink		= shmem_unlink,
>  	.symlink	= shmem_symlink,
> @@ -4791,6 +4923,8 @@ int shmem_init_fs_context(struct fs_context *fc)
>  	ctx->uid = current_fsuid();
>  	ctx->gid = current_fsgid();
>  
> +	ctx->encoding = NULL;
> +
>  	fc->fs_private = ctx;
>  	fc->ops = &shmem_fs_context_ops;
>  	return 0;
André Almeida Sept. 6, 2024, 2:59 p.m. UTC | #2
Hey!

On 9/5/24 18:28, Gabriel Krisman Bertazi wrote:
> Hi,
>
> André Almeida <andrealmeid@igalia.com> writes:
>> @@ -3427,6 +3431,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>>   	if (IS_ERR(inode))
>>   		return PTR_ERR(inode);
>>   
>> +	if (IS_ENABLED(CONFIG_UNICODE))
>> +		if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
>> +			return -EINVAL;
>> +
> if (IS_ENABLED(CONFIG_UNICODE) &&
>      generic_ci_validate_strict_name(dir, &dentry->d_name))
>
>>   static const struct constant_table shmem_param_enums_huge[] = {
>> @@ -4081,9 +4111,62 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
>>   	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
>>   	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
>>   #endif
>> +	fsparam_string("casefold",	Opt_casefold_version),
>> +	fsparam_flag  ("casefold",	Opt_casefold),
>> +	fsparam_flag  ("strict_encoding", Opt_strict_encoding),
> I don't know if it is possible, but can we do it with a single parameter?

I tried, but when you use casefold with no args, the code fails 
somewhere before that, claiming that there's no arg.

>> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
>> +				    bool latest_version)
> Instead of the boolean, can't you check if param->string != NULL? (real
> question, I never used fs_parameter.
>
>> +{
>> +	struct shmem_options *ctx = fc->fs_private;
>> +	unsigned int maj = 0, min = 0, rev = 0, version = 0;
>> +	struct unicode_map *encoding;
>> +	char *version_str = param->string + 5;
>> +	int ret;
> unsigned int version = UTF8_LATEST;
>
> and kill the if/else below:
>> +
>> +	if (latest_version) {
>> +		version = UTF8_LATEST;
>> +	} else {
>> +		if (strncmp(param->string, "utf8-", 5))
>> +			return invalfc(fc, "Only UTF-8 encodings are supported "
>> +				       "in the format: utf8-<version number>");
>> +
>> +		ret = utf8_parse_version(version_str, &maj, &min, &rev);
> utf8_parse_version interface could return UNICODE_AGE() already, so we hide the details
> from the caller. wdyt?

I like it!

>
>> +		if (ret)
>> +			return invalfc(fc, "Invalid UTF-8 version: %s", version_str);
>> +
>> +		version = UNICODE_AGE(maj, min, rev);
>> +	}
>> +
>> +	encoding = utf8_load(version);
>> +
>> +	if (IS_ERR(encoding)) {
>> +		if (latest_version)
>> +			return invalfc(fc, "Failed loading latest UTF-8 version");
>> +		else
>> +			return invalfc(fc, "Failed loading UTF-8 version: %s", version_str);
> The following covers both legs (untested):
>
> if (IS_ERR(encoding))
>    return invalfc(fc, "Failed loading UTF-8 version: utf8-%u.%u.%u\n"",
> 	           unicode_maj(version), unicode_min(version), unicode_rev(version));
>
>> +	if (latest_version)
>> +		pr_info("tmpfs: Using the latest UTF-8 version available");
>> +	else
>> +		pr_info("tmpfs: Using encoding provided by mount
>> options: %s\n", param->string);
> The following covers both legs (untested):
>
> pr_info (fc, "tmpfs: Using encoding : utf8-%u.%u.%u\n"
>           unicode_maj(version), unicode_min(version), unicode_rev(version));
>
>> +
>> +	ctx->encoding = encoding;
>> +
>> +	return 0;
>> +}
>> +#else
>> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
>> +				    bool latest_version)
>> +{
>> +	return invalfc(fc, "tmpfs: No kernel support for casefold filesystems\n");
>> +}
> A message like "Kernel not built with CONFIG_UNICODE" immediately tells
> you how to fix it.
>
>> @@ -4515,6 +4610,16 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>>   	}
>>   	sb->s_export_op = &shmem_export_ops;
>>   	sb->s_flags |= SB_NOSEC | SB_I_VERSION;
>> +
>> +#if IS_ENABLED(CONFIG_UNICODE)
>> +	if (ctx->encoding) {
>> +		sb->s_encoding = ctx->encoding;
>> +		generic_set_sb_d_ops(sb);
> This is the right place for setting d_ops (see the next comment), but you
> should be loading generic_ci_always_del_dentry_ops, right?
>
> Also, since generic_ci_always_del_dentry_ops is only used by this one,
> can you move it to this file?
>
>> +static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
>> +{
>> +	const struct dentry_operations *d_ops = &simple_dentry_operations;
>> +
>> +#if IS_ENABLED(CONFIG_UNICODE)
>> +	if (dentry->d_sb->s_encoding)
>> +		d_ops = &generic_ci_always_del_dentry_ops;
>> +#endif
> This needs to be done at mount time through sb->s_d_op. See
>
> https://lore.kernel.org/all/20240221171412.10710-1-krisman@suse.de/
>
> I suppose we can do it at mount-time for
> generic_ci_always_del_dentry_ops and simple_dentry_operations.
>
>> +
>> +	if (dentry->d_name.len > NAME_MAX)
>> +		return ERR_PTR(-ENAMETOOLONG);
>> +
>> +	if (!dentry->d_sb->s_d_op)
>> +		d_set_d_op(dentry, d_ops);
>> +
>> +	/*
>> +	 * For now, VFS can't deal with case-insensitive negative dentries, so
>> +	 * we prevent them from being created
>> +	 */
>> +	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
>> +		return NULL;
> Thinking out loud:
>
> I misunderstood always_delete_dentry before.  It removes negative
> dentries right after the lookup, since ->d_delete is called on dput.
>
> But you still need this check here, IMO, to prevent the negative dentry
> from ever being hashed. Otherwise it can be found by a concurrent
> lookup.  And you cannot drop ->d_delete from the case-insensitive
> operations too, because we still wants it for !IS_CASEFOLDED(dir).
>
> The window is that, without this code, the negative dentry dentry would
> be hashed in d_add() and a concurrent lookup might find it between that
> time and the d_put, where it is removed at the end of the concurrent
> lookup.
>
> All of this would hopefully go away with the negative dentry for
> casefolded directories.
>
>> +
>> +	d_add(dentry, NULL);
>> +
>> +	return NULL;
>> +}
> The sole reason you are doing this custom function is to exclude negative
> dentries from casefolded directories. I doubt we care about the extra
> check being done.  Can we just do it in simple_lookup?

So, in summary:

* set d_ops at mount time to generic_ci_always_del_dentry_ops
* use simple_lookup(), get rid of shmem_lookup()
* inside of simple_lookup(), add (IS_CASEFOLDED(dir)) return NULL

Right?

>> +
>>   static const struct inode_operations shmem_dir_inode_operations = {
>>   #ifdef CONFIG_TMPFS
>>   	.getattr	= shmem_getattr,
>>   	.create		= shmem_create,
>> -	.lookup		= simple_lookup,
>> +	.lookup		= shmem_lookup,
>>   	.link		= shmem_link,
>>   	.unlink		= shmem_unlink,
>>   	.symlink	= shmem_symlink,
>> @@ -4791,6 +4923,8 @@ int shmem_init_fs_context(struct fs_context *fc)
>>   	ctx->uid = current_fsuid();
>>   	ctx->gid = current_fsgid();
>>   
>> +	ctx->encoding = NULL;
>> +
>>   	fc->fs_private = ctx;
>>   	fc->ops = &shmem_fs_context_ops;
>>   	return 0;
Gabriel Krisman Bertazi Sept. 9, 2024, 2:15 p.m. UTC | #3
André Almeida <andrealmeid@igalia.com> writes:

>> The sole reason you are doing this custom function is to exclude negative
>> dentries from casefolded directories. I doubt we care about the extra
>> check being done.  Can we just do it in simple_lookup?
>
> So, in summary:
>
> * set d_ops at mount time to generic_ci_always_del_dentry_ops
> * use simple_lookup(), get rid of shmem_lookup()
> * inside of simple_lookup(), add (IS_CASEFOLDED(dir)) return NULL
>
> Right?

Yep, that's my suggestion.
diff mbox series

Patch

diff --git a/mm/shmem.c b/mm/shmem.c
index 5a77acf6ac6a..6b61fc5dc0b1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -40,6 +40,8 @@ 
 #include <linux/fs_parser.h>
 #include <linux/swapfile.h>
 #include <linux/iversion.h>
+#include <linux/unicode.h>
+#include <linux/parser.h>
 #include "swap.h"
 
 static struct vfsmount *shm_mnt __ro_after_init;
@@ -123,6 +125,8 @@  struct shmem_options {
 	bool noswap;
 	unsigned short quota_types;
 	struct shmem_quota_limits qlimits;
+	struct unicode_map *encoding;
+	bool strict_encoding;
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
@@ -3427,6 +3431,10 @@  shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
+	if (IS_ENABLED(CONFIG_UNICODE))
+		if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
+			return -EINVAL;
+
 	error = simple_acl_create(dir, inode);
 	if (error)
 		goto out_iput;
@@ -3442,7 +3450,12 @@  shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
 	inode_inc_iversion(dir);
-	d_instantiate(dentry, inode);
+
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		d_add(dentry, inode);
+	else
+		d_instantiate(dentry, inode);
+
 	dget(dentry); /* Extra count - pin the dentry in core */
 	return error;
 
@@ -3533,7 +3546,10 @@  static int shmem_link(struct dentry *old_dentry, struct inode *dir,
 	inc_nlink(inode);
 	ihold(inode);	/* New dentry reference */
 	dget(dentry);	/* Extra pinning count for the created dentry */
-	d_instantiate(dentry, inode);
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		d_add(dentry, inode);
+	else
+		d_instantiate(dentry, inode);
 out:
 	return ret;
 }
@@ -3553,6 +3569,14 @@  static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 	inode_inc_iversion(dir);
 	drop_nlink(inode);
 	dput(dentry);	/* Undo the count from "create" - does all the work */
+
+	/*
+	 * For now, VFS can't deal with case-insensitive negative dentries, so
+	 * we invalidate them
+	 */
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		d_invalidate(dentry);
+
 	return 0;
 }
 
@@ -3697,7 +3721,10 @@  static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
 	inode_inc_iversion(dir);
-	d_instantiate(dentry, inode);
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		d_add(dentry, inode);
+	else
+		d_instantiate(dentry, inode);
 	dget(dentry);
 	return 0;
 
@@ -4050,6 +4077,9 @@  enum shmem_param {
 	Opt_usrquota_inode_hardlimit,
 	Opt_grpquota_block_hardlimit,
 	Opt_grpquota_inode_hardlimit,
+	Opt_casefold_version,
+	Opt_casefold,
+	Opt_strict_encoding,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -4081,9 +4111,62 @@  const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
 	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
 #endif
+	fsparam_string("casefold",	Opt_casefold_version),
+	fsparam_flag  ("casefold",	Opt_casefold),
+	fsparam_flag  ("strict_encoding", Opt_strict_encoding),
 	{}
 };
 
+#if IS_ENABLED(CONFIG_UNICODE)
+static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
+				    bool latest_version)
+{
+	struct shmem_options *ctx = fc->fs_private;
+	unsigned int maj = 0, min = 0, rev = 0, version = 0;
+	struct unicode_map *encoding;
+	char *version_str = param->string + 5;
+	int ret;
+
+	if (latest_version) {
+		version = UTF8_LATEST;
+	} else {
+		if (strncmp(param->string, "utf8-", 5))
+			return invalfc(fc, "Only UTF-8 encodings are supported "
+				       "in the format: utf8-<version number>");
+
+		ret = utf8_parse_version(version_str, &maj, &min, &rev);
+		if (ret)
+			return invalfc(fc, "Invalid UTF-8 version: %s", version_str);
+
+		version = UNICODE_AGE(maj, min, rev);
+	}
+
+	encoding = utf8_load(version);
+
+	if (IS_ERR(encoding)) {
+		if (latest_version)
+			return invalfc(fc, "Failed loading latest UTF-8 version");
+		else
+			return invalfc(fc, "Failed loading UTF-8 version: %s", version_str);
+	}
+
+	if (latest_version)
+		pr_info("tmpfs: Using the latest UTF-8 version available");
+	else
+		pr_info("tmpfs: Using encoding provided by mount options: %s\n", param->string);
+
+	ctx->encoding = encoding;
+
+	return 0;
+}
+#else
+static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
+				    bool latest_version)
+{
+	return invalfc(fc, "tmpfs: No kernel support for casefold filesystems\n");
+}
+#endif
+
 static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct shmem_options *ctx = fc->fs_private;
@@ -4242,6 +4325,13 @@  static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 				       "Group quota inode hardlimit too large.");
 		ctx->qlimits.grpquota_ihardlimit = size;
 		break;
+	case Opt_casefold_version:
+		return shmem_parse_opt_casefold(fc, param, false);
+	case Opt_casefold:
+		return shmem_parse_opt_casefold(fc, param, true);
+	case Opt_strict_encoding:
+		ctx->strict_encoding = true;
+		break;
 	}
 	return 0;
 
@@ -4471,6 +4561,11 @@  static void shmem_put_super(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (sb->s_encoding)
+		utf8_unload(sb->s_encoding);
+#endif
+
 #ifdef CONFIG_TMPFS_QUOTA
 	shmem_disable_quotas(sb);
 #endif
@@ -4515,6 +4610,16 @@  static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	}
 	sb->s_export_op = &shmem_export_ops;
 	sb->s_flags |= SB_NOSEC | SB_I_VERSION;
+
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (ctx->encoding) {
+		sb->s_encoding = ctx->encoding;
+		generic_set_sb_d_ops(sb);
+		if (ctx->strict_encoding)
+			sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
+	}
+#endif
+
 #else
 	sb->s_flags |= SB_NOUSER;
 #endif
@@ -4704,11 +4809,38 @@  static const struct inode_operations shmem_inode_operations = {
 #endif
 };
 
+static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
+	const struct dentry_operations *d_ops = &simple_dentry_operations;
+
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (dentry->d_sb->s_encoding)
+		d_ops = &generic_ci_always_del_dentry_ops;
+#endif
+
+	if (dentry->d_name.len > NAME_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	if (!dentry->d_sb->s_d_op)
+		d_set_d_op(dentry, d_ops);
+
+	/*
+	 * For now, VFS can't deal with case-insensitive negative dentries, so
+	 * we prevent them from being created
+	 */
+	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+		return NULL;
+
+	d_add(dentry, NULL);
+
+	return NULL;
+}
+
 static const struct inode_operations shmem_dir_inode_operations = {
 #ifdef CONFIG_TMPFS
 	.getattr	= shmem_getattr,
 	.create		= shmem_create,
-	.lookup		= simple_lookup,
+	.lookup		= shmem_lookup,
 	.link		= shmem_link,
 	.unlink		= shmem_unlink,
 	.symlink	= shmem_symlink,
@@ -4791,6 +4923,8 @@  int shmem_init_fs_context(struct fs_context *fc)
 	ctx->uid = current_fsuid();
 	ctx->gid = current_fsgid();
 
+	ctx->encoding = NULL;
+
 	fc->fs_private = ctx;
 	fc->ops = &shmem_fs_context_ops;
 	return 0;