diff mbox series

[BlueZ,3/5] shared/util: Introduce strisutf8

Message ID 20230712193854.1862996-3-luiz.dentz@gmail.com (mailing list archive)
State Accepted
Commit d1b33eb8bf0f502761844d09dc64803ed5c9e687
Headers show
Series [BlueZ,1/5] shared/ad: Use util_iov_push_* helpers to generate data | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
tedd_an/CheckPatch success CheckPatch PASS
tedd_an/GitLint success Gitlint PASS
tedd_an/IncrementalBuild success Incremental Build PASS

Commit Message

Luiz Augusto von Dentz July 12, 2023, 7:38 p.m. UTC
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>

This introduces strisutf8 which can be used to verify if a string is
encoded using UTF-8 format.
---
 src/shared/util.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++
 src/shared/util.h |  3 +++
 2 files changed, 65 insertions(+)
diff mbox series

Patch

diff --git a/src/shared/util.c b/src/shared/util.c
index 4d1c0d00545d..e9c1c18f5ea7 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -1705,3 +1705,65 @@  int strsuffix(const char *str, const char *suffix)
 
 	return strncmp(str + len - suffix_len, suffix, suffix_len);
 }
+
+char *strstrip(char *str)
+{
+	size_t size;
+	char *end;
+
+	if (!str)
+		return NULL;
+
+	size = strlen(str);
+	if (!size)
+		return str;
+
+	end = str + size - 1;
+	while (end >= str && isspace(*end))
+		end--;
+	*(end + 1) = '\0';
+
+	while (*str && isspace(*str))
+		str++;
+
+	return str;
+}
+
+bool strisutf8(const char *str, size_t len)
+{
+	size_t i = 0;
+
+	while (i < len) {
+		unsigned char c = str[i];
+		size_t size = 0;
+
+		/* Check the first byte to determine the number of bytes in the
+		 * UTF-8 character.
+		 */
+		if ((c & 0x80) == 0x00)
+			size = 1;
+		else if ((c & 0xE0) == 0xC0)
+			size = 2;
+		else if ((c & 0xF0) == 0xE0)
+			size = 3;
+		else if ((c & 0xF8) == 0xF0)
+			size = 4;
+		else
+			/* Invalid UTF-8 sequence */
+			return false;
+
+		/* Check the following bytes to ensure they have the correct
+		 * format.
+		 */
+		for (size_t j = 1; j < size; ++j) {
+			if (i + j > len || (str[i + j] & 0xC0) != 0x80)
+				/* Invalid UTF-8 sequence */
+				return false;
+		}
+
+		/* Move to the next character */
+		i += size;
+	}
+
+	return true;
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index ce57b53be9ef..c37b0f7296ab 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -12,6 +12,7 @@ 
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <alloca.h>
 #include <byteswap.h>
 #include <string.h>
@@ -88,6 +89,8 @@  do {						\
 
 char *strdelimit(char *str, char *del, char c);
 int strsuffix(const char *str, const char *suffix);
+char *strstrip(char *str);
+bool strisutf8(const char *str, size_t length);
 
 void *util_malloc(size_t size);
 void *util_memdup(const void *src, size_t size);