diff mbox series

client: handle wide chars for table rows

Message ID 20221008075509.26252-1-xdavidwuph@gmail.com (mailing list archive)
State New
Headers show
Series client: handle wide chars for table rows | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
prestwoj/iwd-alpine-ci-fetch success Fetch PR
prestwoj/iwd-ci-gitlint success GitLint
prestwoj/iwd-ci-fetch success Fetch PR
prestwoj/iwd-ci-makedistcheck success Make Distcheck
prestwoj/iwd-ci-incremental_build success Incremental build not run PASS
prestwoj/iwd-ci-build success Build - Configure
prestwoj/iwd-alpine-ci-makedistcheck success Make Distcheck
prestwoj/iwd-alpine-ci-incremental_build success Incremental build not run PASS
prestwoj/iwd-alpine-ci-build success Build - Configure
prestwoj/iwd-ci-makecheckvalgrind fail Make FAIL: client/display.c: In function ‘next_line’: client/display.c:410:36: error: comparison of integer expressions of different signedness: ‘unsigned int’ and ‘int’ [-Werror=sign-compare] 410 | for (i = 0; i <= *max && i != s_len; i++) { | ^~ cc1: all warnings being treated as errors make[1]: *** [Makefile:2408: client/display.o] Error 1 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:1587: all] Error 2
prestwoj/iwd-ci-testrunner pending testrunner SKIP
prestwoj/iwd-ci-makecheck pending makecheck SKIP
prestwoj/iwd-ci-clang fail Clang IWD - make FAIL: client/display.c:410:29: error: comparison of integers of different signs: 'unsigned int' and 'int' [-Werror,-Wsign-compare] for (i = 0; i <= *max && i != s_len; i++) { ~ ^ ~~~~~ 1 error generated. make[1]: *** [Makefile:2408: client/display.o] Error 1 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:1587: all] Error 2
prestwoj/iwd-alpine-ci-makecheckvalgrind fail Make FAIL: client/display.c: In function 'next_line': client/display.c:410:36: error: comparison of integer expressions of different signedness: 'unsigned int' and 'int' [-Werror=sign-compare] 410 | for (i = 0; i <= *max && i != s_len; i++) { | ^~ cc1: all warnings being treated as errors make[1]: *** [Makefile:2409: client/display.o] Error 1 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:1588: all] Error 2
prestwoj/iwd-alpine-ci-makecheck pending makecheck SKIP

Commit Message

Pinghao Wu Oct. 8, 2022, 7:55 a.m. UTC
Try to find out printing width of wide chars using current locale and
its byte length before falling back to non-codepoint UTF-8 bytes method.
---
 client/display.c | 16 +++++++++++++++-
 client/main.c    |  3 +++
 2 files changed, 18 insertions(+), 1 deletion(-)

Comments

James Prestwood Oct. 10, 2022, 3:57 p.m. UTC | #1
Hi Pinghao,

On Sat, 2022-10-08 at 15:55 +0800, Pinghao Wu wrote:
> Try to find out printing width of wide chars using current locale and
> its byte length before falling back to non-codepoint UTF-8 bytes
> method.

So IWD itself only supports utf-8 so I'm not sure adding support for
wide chars in iwctl is really useful.

This function is for printing table rows and any data in these rows
comes from DBus, which will always be utf8.

Unless you have some other use case I hadn't thought of?

Thanks,
James
Pinghao Wu Oct. 10, 2022, 4:09 p.m. UTC | #2
The current non-codepoint UTF-8 method assumes that a codepoint occupies
one column while printing. This is not true for some characters like
Chinese characters, which occupies two columns.

The motivation of this patch is to handle these "wide" (in printing
columns) chars properly via wcwidth(). It's not actually about
non-UTF-8, but it is also useful for that case.
James Prestwood Oct. 10, 2022, 4:55 p.m. UTC | #3
On Tue, 2022-10-11 at 00:09 +0800, Pinghao Wu wrote:
> The current non-codepoint UTF-8 method assumes that a codepoint
> occupies
> one column while printing. This is not true for some characters like
> Chinese characters, which occupies two columns.

Ah ok, in this case it may be best to combine you're changes with the
non-codepoint check since you're changes are more complete for handling
utf-8. I'll review the original patch.

> 
> The motivation of this patch is to handle these "wide" (in printing
> columns) chars properly via wcwidth(). It's not actually about
> non-UTF-8, but it is also useful for that case.
James Prestwood Oct. 10, 2022, 5 p.m. UTC | #4
On Sat, 2022-10-08 at 15:55 +0800, Pinghao Wu wrote:
> Try to find out printing width of wide chars using current locale and
> its byte length before falling back to non-codepoint UTF-8 bytes
> method.
> ---
>  client/display.c | 16 +++++++++++++++-
>  client/main.c    |  3 +++
>  2 files changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/client/display.c b/client/display.c
> index b729ad4c..dcbe11f8 100644
> --- a/client/display.c
> +++ b/client/display.c
> @@ -26,10 +26,13 @@
>  
>  #define _GNU_SOURCE
>  #include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
>  #include <signal.h>
>  #include <sys/stat.h>
>  #include <sys/ioctl.h>
>  #include <unistd.h>
> +#include <wchar.h>
>  
>  #include <readline/history.h>
>  #include <readline/readline.h>
> @@ -401,15 +404,26 @@ static char* next_line(char *s, unsigned int
> *max, char **color_out)
>         unsigned int i;
>         int last_space = -1;
>         int last_color = -1;
> +       int s_len = strlen(s);
>  
>         /* Find the last space before 'max', as well as any color */
> -       for (i = 0; i <= *max && s[i] != '\0'; i++) {
> +       for (i = 0; i <= *max && i != s_len; i++) {
>                 if (s[i] == ' ')
>                         last_space = i;
>                 else if (s[i] == 0x1b) {
>                         /* color escape won't count for column width
> */
>                         *max += color_end(s + i);
>                         last_color = i;
> +               /* Non-ASCII, try wchar */
> +               } else if (s[i] & 0x80) {
> +                       wchar_t w;
> +                       int w_mblen;
> +                       if ((w_mblen = mbtowc(&w, &s[i], s_len - i))

Instead of mbtowc() use l_utf8_get_codepoint() (an ELL utility). Since
we assume the input is going to be utf-8 always.

> > 0) {
> +                               /* Compensate max bytes for wide char
> */
> +                               *max += w_mblen - wcwidth(w);
> +                               /* Skip other bytes for this wchar */
> +                               i += w_mblen - 1;
> +                       }
>                 /* Add width for non-codepoint UTF-8 bytes */
>                 } else if (((uint8_t)s[i] >> 6) == 2)
>                         *max += 1;

And you can go ahead and remove this if-clause entirely since the above
case should now cover it. Plus the s[i] & 0x80 check above actually
prevents this branch from ever being taken.

> diff --git a/client/main.c b/client/main.c
> index df5c0a61..7e8dead4 100644
> --- a/client/main.c
> +++ b/client/main.c
> @@ -25,6 +25,7 @@
>  #endif
>  
>  #include <errno.h>
> +#include <locale.h>
>  #include <signal.h>
>  #include <ell/ell.h>
>  
> @@ -50,6 +51,8 @@ int main(int argc, char *argv[])
>  {
>         bool all_done;
>  
> +       setlocale(LC_CTYPE, "");
> +
>         if (!l_main_init())
>                 return EXIT_FAILURE;
>  

Thanks,
James
diff mbox series

Patch

diff --git a/client/display.c b/client/display.c
index b729ad4c..dcbe11f8 100644
--- a/client/display.c
+++ b/client/display.c
@@ -26,10 +26,13 @@ 
 
 #define _GNU_SOURCE
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include <signal.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <unistd.h>
+#include <wchar.h>
 
 #include <readline/history.h>
 #include <readline/readline.h>
@@ -401,15 +404,26 @@  static char* next_line(char *s, unsigned int *max, char **color_out)
 	unsigned int i;
 	int last_space = -1;
 	int last_color = -1;
+	int s_len = strlen(s);
 
 	/* Find the last space before 'max', as well as any color */
-	for (i = 0; i <= *max && s[i] != '\0'; i++) {
+	for (i = 0; i <= *max && i != s_len; i++) {
 		if (s[i] == ' ')
 			last_space = i;
 		else if (s[i] == 0x1b) {
 			/* color escape won't count for column width */
 			*max += color_end(s + i);
 			last_color = i;
+		/* Non-ASCII, try wchar */
+		} else if (s[i] & 0x80) {
+			wchar_t w;
+			int w_mblen;
+			if ((w_mblen = mbtowc(&w, &s[i], s_len - i)) > 0) {
+				/* Compensate max bytes for wide char */
+				*max += w_mblen - wcwidth(w);
+				/* Skip other bytes for this wchar */
+				i += w_mblen - 1;
+			}
 		/* Add width for non-codepoint UTF-8 bytes */
 		} else if (((uint8_t)s[i] >> 6) == 2)
 			*max += 1;
diff --git a/client/main.c b/client/main.c
index df5c0a61..7e8dead4 100644
--- a/client/main.c
+++ b/client/main.c
@@ -25,6 +25,7 @@ 
 #endif
 
 #include <errno.h>
+#include <locale.h>
 #include <signal.h>
 #include <ell/ell.h>
 
@@ -50,6 +51,8 @@  int main(int argc, char *argv[])
 {
 	bool all_done;
 
+	setlocale(LC_CTYPE, "");
+
 	if (!l_main_init())
 		return EXIT_FAILURE;