diff mbox series

[v3,03/13] expand: Count multi-byte characters for VSLENGTH

Message ID bc7a793eae26aa07fd0697af90728e078f92c4b6.1714900377.git.herbert@gondor.apana.org.au (mailing list archive)
State Changes Requested
Delegated to: Herbert Xu
Headers show
Series Add multi-byte support | expand

Commit Message

Herbert Xu May 5, 2024, 9:14 a.m. UTC
Count multi-byte characters in variables and rather than bytes
and return that as the length expansion.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 src/expand.c | 62 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/src/expand.c b/src/expand.c
index 9ac981e..ad186b0 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -53,6 +53,7 @@ 
 #endif
 #include <ctype.h>
 #include <stdbool.h>
+#include <wchar.h>
 
 /*
  * Routines to expand arguments to commands.  We have to deal with
@@ -796,6 +797,18 @@  really_record:
 	return p;
 }
 
+static char *chtodest(int c, int flags, char *out)
+{
+	const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
+
+	if ((flags & QUOTES_ESC) &&
+	    ((syntax[c] == CCTL) ||
+	     (flags & EXP_QUOTED && syntax[c] == CBACK)))
+		USTPUTC(CTLESC, out);
+	USTPUTC(c, out);
+
+	return out;
+}
 
 /*
  * Put a string on the stack.
@@ -803,38 +816,48 @@  really_record:
 
 static size_t memtodest(const char *p, size_t len, int flags)
 {
-	const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
+	size_t count = 0;
 	char *q;
-	char *s;
+	int c;
 
 	if (unlikely(!len))
 		return 0;
 
 	q = makestrspace(len * 2, expdest);
-	s = q;
 
 	do {
-		int c = (signed char)*p++;
-		if (c) {
-			if ((flags & QUOTES_ESC) &&
-			    ((syntax[c] == CCTL) ||
-			     (flags & EXP_QUOTED && syntax[c] == CBACK)))
-				USTPUTC(CTLESC, q);
-		} else if (!(flags & EXP_KEEPNUL))
+		c = (signed char)*p++;
+
+		if (c)
+			count++;
+		else if (!(flags & EXP_KEEPNUL))
 			continue;
-		USTPUTC(c, q);
+
+		if (c < 0) {
+			mbstate_t mbs = {};
+
+			p--;
+			do {
+				q = chtodest(c, flags, q);
+			} while (mbrlen(p++, 1, &mbs) == -2 &&
+				 (c = *p, --len));
+			if (!len)
+				break;
+			continue;
+		}
+
+		q = chtodest(c, flags, q);
 	} while (--len);
 
 	expdest = q;
-	return q - s;
+	return count;
 }
 
 
 static size_t strtodest(const char *p, int flags)
 {
 	size_t len = strlen(p);
-	memtodest(p, len, flags);
-	return len;
+	return memtodest(p, len, flags);
 }
 
 
@@ -856,6 +879,7 @@  varvalue(char *name, int varflags, int flags, int quoted)
 	int discard = (subtype == VSPLUS || subtype == VSLENGTH) |
 		      (flags & EXP_DISCARD);
 	ssize_t len = 0;
+	size_t start;
 	char c;
 
 	if (!subtype) {
@@ -865,9 +889,9 @@  varvalue(char *name, int varflags, int flags, int quoted)
 		sh_error("Bad substitution");
 	}
 
-	flags |= EXP_KEEPNUL;
 	flags &= discard ? ~QUOTES_ESC : ~0;
 	sep = (flags & EXP_FULL) << CHAR_BIT;
+	start = expdest - (char *)stackblock();
 
 	switch (*name) {
 	case '$':
@@ -927,7 +951,7 @@  param:
 
 			if (*ap && sep) {
 				len++;
-				memtodest(&sepc, 1, flags);
+				memtodest(&sepc, 1, flags | EXP_KEEPNUL);
 			}
 		}
 		break;
@@ -957,7 +981,7 @@  value:
 	}
 
 	if (discard)
-		STADJUST(-len, expdest);
+		expdest = (char *)stackblock() + start;
 
 	return len;
 }
@@ -1758,11 +1782,13 @@  casematch(union node *pattern, char *val)
 
 static size_t cvtnum(intmax_t num, int flags)
 {
+	size_t start = expdest - (char *)stackblock();
 	int len = max_int_length(sizeof(num));
 	char buf[len];
 
 	len = fmtstr(buf, len, "%" PRIdMAX, num);
-	return memtodest(buf, len, flags);
+	memtodest(buf, len, flags);
+	return (expdest - (char *)stackblock()) - start;
 }
 
 STATIC void