diff mbox series

[RFC,v2,14/18] jfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option

Message ID 20221226142150.13324-15-pali@kernel.org (mailing list archive)
State New, archived
Headers show
Series fs: Remove usage of broken nls_utf8 and drop it | expand

Commit Message

Pali Rohár Dec. 26, 2022, 2:21 p.m. UTC
NLS table for utf8 is broken and cannot be fixed.

So instead of broken utf8 nls functions char2uni() and uni2char() use
functions utf8s_to_utf16s() and utf16s_to_utf8s() which implements correct
conversion between UTF-16 and UTF-8.

These functions implements also correct processing of UTF-16 surrogate
pairs and therefore after this change jfs driver would be able to correctly
handle also file names with 4-byte UTF-8 sequences.

When iochatset=utf8 is used then set sbi->nls_tab to NULL and use it for
distinguish between the fact if NLS table or native UTF-8 functions should
be used.

Signed-off-by: Pali Rohár <pali@kernel.org>
---
 fs/jfs/jfs_unicode.c | 17 +++++++++++++++--
 fs/jfs/super.c       | 24 +++++++++++++++---------
 2 files changed, 30 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c
index 2db923872bf1..0b0b80063a98 100644
--- a/fs/jfs/jfs_unicode.c
+++ b/fs/jfs/jfs_unicode.c
@@ -46,6 +46,9 @@  int jfs_strfromUCS_le(char *to, int maxlen, const __le16 * from,
 				}
 			}
 		}
+	} else {
+		outlen = utf16s_to_utf8s((const wchar_t *)from, len,
+					 UTF16_LITTLE_ENDIAN, to, maxlen-1);
 	}
 	to[outlen] = 0;
 	return outlen;
@@ -61,6 +64,7 @@  static int jfs_strtoUCS(wchar_t * to, const unsigned char *from, int len,
 		struct nls_table *codepage)
 {
 	int charlen;
+	int outlen;
 	int i;
 
 	if (codepage) {
@@ -75,10 +79,19 @@  static int jfs_strtoUCS(wchar_t * to, const unsigned char *from, int len,
 				return charlen;
 			}
 		}
+		outlen = i;
+	} else {
+		outlen = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
+					 to, len);
+		if (outlen < 1) {
+			jfs_err("jfs_strtoUCS: utf8s_to_utf16s returned %d.",
+				outlen);
+			return outlen;
+		}
 	}
 
-	to[i] = 0;
-	return i;
+	to[outlen] = 0;
+	return outlen;
 }
 
 /*
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index a2bb3d5d3f69..f26460147b62 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -261,16 +261,20 @@  static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 			/* Don't do anything ;-) */
 			break;
 		case Opt_iocharset:
-			if (nls_map && nls_map != (void *) -1)
+			if (nls_map && nls_map != (void *) -1) {
 				unload_nls(nls_map);
-			/* compatibility alias none means ISO-8859-1 */
-			if (strcmp(args[0].from, "none") == 0)
-				nls_map = load_nls("iso8859-1");
-			else
-				nls_map = load_nls(args[0].from);
-			if (!nls_map) {
-				pr_err("JFS: charset not found\n");
-				goto cleanup;
+				nls_map = NULL;
+			}
+			if (strcmp(args[0].from, "utf8") != 0) {
+				/* compatibility alias none means ISO-8859-1 */
+				if (strcmp(args[0].from, "none") == 0)
+					nls_map = load_nls("iso8859-1");
+				else
+					nls_map = load_nls(args[0].from);
+				if (!nls_map) {
+					pr_err("JFS: charset not found\n");
+					goto cleanup;
+				}
 			}
 			break;
 		case Opt_resize:
@@ -713,6 +717,8 @@  static int jfs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",discard=%u", sbi->minblks_trim);
 	if (sbi->nls_tab)
 		seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset);
+	else
+		seq_puts(seq, ",iocharset=utf8");
 	if (sbi->flag & JFS_ERR_CONTINUE)
 		seq_printf(seq, ",errors=continue");
 	if (sbi->flag & JFS_ERR_PANIC)