Message ID | 20190427163007.5113-3-samuel.thibault@ens-lyon.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [PATCHv3,1/2] ui/curses: Do not assume wchar_t contains unicode | expand |
On 27.04.2019 18:30, Samuel Thibault wrote: > E.g. BSD and Solaris even use locale-specific encoding there. > > We thus have to go through the native multibyte representation and use > mbtowc/wctomb to make a proper conversion. > > Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org> Both patches work for me on NetBSD/amd64 8.99.37 for qemu-system-x86_64. Borders are printed correctly. Regarding the patch I'm not sure whether to use MB_LEN_MAX or MB_CUR_MAX? I'm also unsure whether to reset conversion state between a multibyte character and wide character, with: `mbtowc(NULL, 0, 0);`. It's recommended to use in code examples examples. I think it doesn't make any harm to use it. I'm not sure if this is related, but "qemu-system-hppa -curses" is broken for me. I didn't use it in the past as it just recently acquired NetBSD guest support. (lldb) bt * thread #1, stop reason = signal SIGSEGV frame #0: 0x00007f7ff6c1fb98 libcurses.so.7`wmove(win=0x0000000000000000, y=0, x=0) at move.c:71 frame #1: 0x00007f7ff6c0ca9b libcurses.so.7`mvwadd_wchnstr(win=0x0000000000000000, y=<unavailable>, x=<unavailable>, wchstr=0x00007f7fffffe020, n=0) at add_wchstr.c:123 * frame #2: 0x000000000078629e qemu-system-hppa`curses_update(dcl=0x00007f7ff7bd8bc0, x=0, y=0, w=79, h=24) at curses.c:86:9 frame #3: 0x0000000000753dae qemu-system-hppa`dpy_text_update(con=0x00007f7ff7bae580, x=0, y=0, w=79, h=24) at console.c:1658:13 frame #4: 0x0000000000758abe qemu-system-hppa`text_console_update(opaque=0x00007f7ff7bae580, chardata=0x000000000118e490) at console.c:1264:9 frame #5: 0x0000000000751ef8 qemu-system-hppa`graphic_hw_text_update(con=0x00007f7ff7bae580, chardata=0x000000000118c690) at console.c:389:9 frame #6: 0x0000000000785bcb qemu-system-hppa`curses_refresh(dcl=0x00007f7ff7bd8bc0) at curses.c:273:5 frame #7: 0x0000000000758248 qemu-system-hppa`dpy_refresh(s=0x00007f7ff7bd8770) at console.c:1622:13 frame #8: 0x000000000075809d qemu-system-hppa`gui_update(opaque=0x00007f7ff7bd8770) at console.c:205:5 frame #9: 0x00000000008d9f4d qemu-system-hppa`timerlist_run_timers(timer_list=0x00007f7ff7e57d20) at qemu-timer.c:574:9 frame #10: 0x00000000008da01d qemu-system-hppa`qemu_clock_run_timers(type=QEMU_CLOCK_REALTIME) at qemu-timer.c:588:12 frame #11: 0x00000000008da4ea qemu-system-hppa`qemu_clock_run_all_timers at qemu-timer.c:708:25 frame #12: 0x00000000008da962 qemu-system-hppa`main_loop_wait(nonblocking=0) at main-loop.c:519:5 frame #13: 0x00000000005570a4 qemu-system-hppa`main_loop at vl.c:1970:9 frame #14: 0x0000000000551fa4 qemu-system-hppa`main(argc=2, argv=0x00007f7fffffe768, envp=0x00007f7fffffe780) at vl.c:4604:5 frame #15: 0x000000000040e7ad qemu-system-hppa`___start + 280 (lldb) p screenpad (WINDOW *) $2 = 0x0000000000000000 We pass NULL window argument to mvwadd_wchnstr(3) and crash. Can you reproduce it locally? I will try to investigate it. > --- > ui/curses.c | 151 ++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 94 insertions(+), 57 deletions(-) > > diff --git a/ui/curses.c b/ui/curses.c > index fb63945188..395f9545e9 100644 > --- a/ui/curses.c > +++ b/ui/curses.c > @@ -400,65 +400,102 @@ static void curses_atexit(void) > endwin(); > } > > +/* > + * In the following: > + * - fch is the font glyph number > + * - uch is the unicode value > + * - wch is the wchar_t value (may not be unicode, e.g. on BSD/solaris) > + * - mbch is the native local-dependent multibyte representation > + */ > + > /* Setup wchar glyph for one UCS-2 char */ > -static void convert_ucs(int glyph, uint16_t ch, iconv_t conv) > +static void convert_ucs(unsigned char fch, uint16_t uch, iconv_t conv) > { > + char mbch[MB_CUR_MAX]; > wchar_t wch; > - char *pch, *pwch; > - size_t sch, swch; > - > - pch = (char *) &ch; > - pwch = (char *) &wch; > - sch = sizeof(ch); > - swch = sizeof(wch); > + char *puch, *pmbch; > + size_t such, smbch; > + > + puch = (char *) &uch; > + pmbch = (char *) mbch; > + such = sizeof(uch); > + smbch = sizeof(mbch); > + > + if (iconv(conv, &puch, &such, &pmbch, &smbch) == (size_t) -1) { > + fprintf(stderr, "Could not convert 0x%04x " > + "from UCS-2 to a multibyte character: %s\n", > + uch, strerror(errno)); > + return; > + } > > - if (iconv(conv, &pch, &sch, &pwch, &swch) == (size_t) -1) { > - fprintf(stderr, "Could not convert 0x%04x from UCS-2 to WCHAR_T: %s\n", > - ch, strerror(errno)); > - } else { > - vga_to_curses[glyph].chars[0] = wch; > + if (mbtowc(&wch, mbch, sizeof(mbch) - smbch) == -1) { > + fprintf(stderr, "Could not convert 0x%04x " > + "from a multibyte character to wchar_t: %s\n", > + uch, strerror(errno)); > + return; > } > + vga_to_curses[fch].chars[0] = wch; > } > > /* Setup wchar glyph for one font character */ > -static void convert_font(unsigned char ch, iconv_t conv) > +static void convert_font(unsigned char fch, iconv_t conv) > { > + char mbch[MB_CUR_MAX]; > wchar_t wch; > - char *pch, *pwch; > - size_t sch, swch; > - > - pch = (char *) &ch; > - pwch = (char *) &wch; > - sch = sizeof(ch); > - swch = sizeof(wch); > + char *pfch, *pmbch; > + size_t sfch, smbch; > + > + pfch = (char *) &fch; > + pmbch = (char *) &mbch; > + sfch = sizeof(fch); > + smbch = sizeof(mbch); > + > + if (iconv(conv, &pfch, &sfch, &pmbch, &smbch) == (size_t) -1) { > + fprintf(stderr, "Could not convert font glyph 0x%02x " > + "from %s to a multibyte character: %s\n", > + fch, font_charset, strerror(errno)); > + return; > + } > > - if (iconv(conv, &pch, &sch, &pwch, &swch) == (size_t) -1) { > - fprintf(stderr, "Could not convert 0x%02x from %s to WCHAR_T: %s\n", > - ch, font_charset, strerror(errno)); > - } else { > - vga_to_curses[ch].chars[0] = wch; > + if (mbtowc(&wch, mbch, sizeof(mbch) - smbch) == -1) { > + fprintf(stderr, "Could not convert font glyph 0x%02x " > + "from a multibyte character to wchar_t: %s\n", > + fch, strerror(errno)); > + return; > } > + vga_to_curses[fch].chars[0] = wch; > } > > /* Convert one wchar to UCS-2 */ > static uint16_t get_ucs(wchar_t wch, iconv_t conv) > { > - uint16_t ch; > - char *pch, *pwch; > - size_t sch, swch; > - > - pch = (char *) &ch; > - pwch = (char *) &wch; > - sch = sizeof(ch); > - swch = sizeof(wch); > - > - if (iconv(conv, &pwch, &swch, &pch, &sch) == (size_t) -1) { > - fprintf(stderr, "Could not convert 0x%02lx from WCHAR_T to UCS-2: %s\n", > - (unsigned long)wch, strerror(errno)); > + char mbch[MB_CUR_MAX]; > + uint16_t uch; > + char *pmbch, *puch; > + size_t smbch, such; > + int ret; > + > + ret = wctomb(mbch, wch); > + if (ret == -1) { > + fprintf(stderr, "Could not convert 0x%04x " > + "from wchar_t to a multibyte character: %s\n", > + wch, strerror(errno)); > + return 0xFFFD; > + } > + > + pmbch = (char *) mbch; > + puch = (char *) &uch; > + smbch = ret; > + such = sizeof(uch); > + > + if (iconv(conv, &pmbch, &smbch, &puch, &such) == (size_t) -1) { > + fprintf(stderr, "Could not convert 0x%04x " > + "from a multibyte character to UCS-2 : %s\n", > + wch, strerror(errno)); > return 0xFFFD; > } > > - return ch; > + return uch; > } > > /* > @@ -466,6 +503,11 @@ static uint16_t get_ucs(wchar_t wch, iconv_t conv) > */ > static void font_setup(void) > { > + iconv_t ucs2_to_nativecharset; > + iconv_t nativecharset_to_ucs2; > + iconv_t font_conv; > + int i; > + > /* > * Control characters are normally non-printable, but VGA does have > * well-known glyphs for them. > @@ -505,30 +547,25 @@ static void font_setup(void) > 0x25bc > }; > > - iconv_t ucs_to_wchar_conv; > - iconv_t wchar_to_ucs_conv; > - iconv_t font_conv; > - int i; > - > - ucs_to_wchar_conv = iconv_open("WCHAR_T", "UCS-2"); > - if (ucs_to_wchar_conv == (iconv_t) -1) { > + ucs2_to_nativecharset = iconv_open(nl_langinfo(CODESET), "UCS-2"); > + if (ucs2_to_nativecharset == (iconv_t) -1) { > fprintf(stderr, "Could not convert font glyphs from UCS-2: '%s'\n", > strerror(errno)); > exit(1); > } > > - wchar_to_ucs_conv = iconv_open("UCS-2", "WCHAR_T"); > - if (wchar_to_ucs_conv == (iconv_t) -1) { > - iconv_close(ucs_to_wchar_conv); > + nativecharset_to_ucs2 = iconv_open("UCS-2", nl_langinfo(CODESET)); > + if (nativecharset_to_ucs2 == (iconv_t) -1) { > + iconv_close(ucs2_to_nativecharset); > fprintf(stderr, "Could not convert font glyphs to UCS-2: '%s'\n", > strerror(errno)); > exit(1); > } > > - font_conv = iconv_open("WCHAR_T", font_charset); > + font_conv = iconv_open(nl_langinfo(CODESET), font_charset); > if (font_conv == (iconv_t) -1) { > - iconv_close(ucs_to_wchar_conv); > - iconv_close(wchar_to_ucs_conv); > + iconv_close(ucs2_to_nativecharset); > + iconv_close(nativecharset_to_ucs2); > fprintf(stderr, "Could not convert font glyphs from %s: '%s'\n", > font_charset, strerror(errno)); > exit(1); > @@ -536,7 +573,7 @@ static void font_setup(void) > > /* Control characters */ > for (i = 0; i <= 0x1F; i++) { > - convert_ucs(i, control_characters[i], ucs_to_wchar_conv); > + convert_ucs(i, control_characters[i], ucs2_to_nativecharset); > } > > for (i = 0x20; i <= 0xFF; i++) { > @@ -544,12 +581,12 @@ static void font_setup(void) > } > > /* DEL */ > - convert_ucs(0x7F, 0x2302, ucs_to_wchar_conv); > + convert_ucs(0x7F, 0x2302, ucs2_to_nativecharset); > > if (strcmp(nl_langinfo(CODESET), "UTF-8")) { > /* Non-Unicode capable, use termcap equivalents for those available */ > for (i = 0; i <= 0xFF; i++) { > - switch (get_ucs(vga_to_curses[i].chars[0], wchar_to_ucs_conv)) { > + switch (get_ucs(vga_to_curses[i].chars[0], nativecharset_to_ucs2)) { > case 0x00a3: > vga_to_curses[i] = *WACS_STERLING; > break; > @@ -649,8 +686,8 @@ static void font_setup(void) > } > } > } > - iconv_close(ucs_to_wchar_conv); > - iconv_close(wchar_to_ucs_conv); > + iconv_close(ucs2_to_nativecharset); > + iconv_close(nativecharset_to_ucs2); > iconv_close(font_conv); > } > >
Kamil Rytarowski, le sam. 27 avril 2019 19:36:40 +0200, a ecrit: > On 27.04.2019 18:30, Samuel Thibault wrote: > > E.g. BSD and Solaris even use locale-specific encoding there. > > > > We thus have to go through the native multibyte representation and use > > mbtowc/wctomb to make a proper conversion. > > > > Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org> > > Both patches work for me on NetBSD/amd64 8.99.37 for qemu-system-x86_64. > Borders are printed correctly. > > Regarding the patch I'm not sure whether to use MB_LEN_MAX or MB_CUR_MAX? I don't know if qemu can afford VLA? > I'm also unsure whether to reset conversion state between a multibyte > character and wide character, with: `mbtowc(NULL, 0, 0);`. It's > recommended to use in code examples examples. I think it doesn't make > any harm to use it. Mmm, better yet, we should actually use mbrtowc and wcrtomb. I have fixed this in my tree. > I'm not sure if this is related, but "qemu-system-hppa -curses" is > broken for me. I didn't use it in the past as it just recently acquired > NetBSD guest support. > > (lldb) bt > libcurses.so.7`mvwadd_wchnstr(win=0x0000000000000000, y=<unavailable>, > x=<unavailable>, wchstr=0x00007f7fffffe020, n=0) at add_wchstr.c:123 > * frame #2: 0x000000000078629e > qemu-system-hppa`curses_update(dcl=0x00007f7ff7bd8bc0, x=0, y=0, w=79, > h=24) at curses.c:86:9 > frame #3: 0x0000000000753dae > qemu-system-hppa`dpy_text_update(con=0x00007f7ff7bae580, x=0, y=0, w=79, > (lldb) p screenpad > (WINDOW *) $2 = 0x0000000000000000 I don't think this is related at all, screenpad management is another matter. Samuel
On 27.04.2019 19:57, Samuel Thibault wrote: > Kamil Rytarowski, le sam. 27 avril 2019 19:36:40 +0200, a ecrit: >> On 27.04.2019 18:30, Samuel Thibault wrote: >>> E.g. BSD and Solaris even use locale-specific encoding there. >>> >>> We thus have to go through the native multibyte representation and use >>> mbtowc/wctomb to make a proper conversion. >>> >>> Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org> >> >> Both patches work for me on NetBSD/amd64 8.99.37 for qemu-system-x86_64. >> Borders are printed correctly. >> >> Regarding the patch I'm not sure whether to use MB_LEN_MAX or MB_CUR_MAX? > > I don't know if qemu can afford VLA? > It's better to avoid VLA and pick MB_LEN_MAX. >> I'm also unsure whether to reset conversion state between a multibyte >> character and wide character, with: `mbtowc(NULL, 0, 0);`. It's >> recommended to use in code examples examples. I think it doesn't make >> any harm to use it. > > Mmm, better yet, we should actually use mbrtowc and wcrtomb. I have > fixed this in my tree. > This is even better. >> I'm not sure if this is related, but "qemu-system-hppa -curses" is >> broken for me. I didn't use it in the past as it just recently acquired >> NetBSD guest support. >> >> (lldb) bt >> libcurses.so.7`mvwadd_wchnstr(win=0x0000000000000000, y=<unavailable>, >> x=<unavailable>, wchstr=0x00007f7fffffe020, n=0) at add_wchstr.c:123 >> * frame #2: 0x000000000078629e >> qemu-system-hppa`curses_update(dcl=0x00007f7ff7bd8bc0, x=0, y=0, w=79, >> h=24) at curses.c:86:9 >> frame #3: 0x0000000000753dae >> qemu-system-hppa`dpy_text_update(con=0x00007f7ff7bae580, x=0, y=0, w=79, > >> (lldb) p screenpad >> (WINDOW *) $2 = 0x0000000000000000 > > I don't think this is related at all, screenpad management is another > matter. > OK! I will treat it as an independent issue and try to address it. > Samuel >
diff --git a/ui/curses.c b/ui/curses.c index fb63945188..395f9545e9 100644 --- a/ui/curses.c +++ b/ui/curses.c @@ -400,65 +400,102 @@ static void curses_atexit(void) endwin(); } +/* + * In the following: + * - fch is the font glyph number + * - uch is the unicode value + * - wch is the wchar_t value (may not be unicode, e.g. on BSD/solaris) + * - mbch is the native local-dependent multibyte representation + */ + /* Setup wchar glyph for one UCS-2 char */ -static void convert_ucs(int glyph, uint16_t ch, iconv_t conv) +static void convert_ucs(unsigned char fch, uint16_t uch, iconv_t conv) { + char mbch[MB_CUR_MAX]; wchar_t wch; - char *pch, *pwch; - size_t sch, swch; - - pch = (char *) &ch; - pwch = (char *) &wch; - sch = sizeof(ch); - swch = sizeof(wch); + char *puch, *pmbch; + size_t such, smbch; + + puch = (char *) &uch; + pmbch = (char *) mbch; + such = sizeof(uch); + smbch = sizeof(mbch); + + if (iconv(conv, &puch, &such, &pmbch, &smbch) == (size_t) -1) { + fprintf(stderr, "Could not convert 0x%04x " + "from UCS-2 to a multibyte character: %s\n", + uch, strerror(errno)); + return; + } - if (iconv(conv, &pch, &sch, &pwch, &swch) == (size_t) -1) { - fprintf(stderr, "Could not convert 0x%04x from UCS-2 to WCHAR_T: %s\n", - ch, strerror(errno)); - } else { - vga_to_curses[glyph].chars[0] = wch; + if (mbtowc(&wch, mbch, sizeof(mbch) - smbch) == -1) { + fprintf(stderr, "Could not convert 0x%04x " + "from a multibyte character to wchar_t: %s\n", + uch, strerror(errno)); + return; } + vga_to_curses[fch].chars[0] = wch; } /* Setup wchar glyph for one font character */ -static void convert_font(unsigned char ch, iconv_t conv) +static void convert_font(unsigned char fch, iconv_t conv) { + char mbch[MB_CUR_MAX]; wchar_t wch; - char *pch, *pwch; - size_t sch, swch; - - pch = (char *) &ch; - pwch = (char *) &wch; - sch = sizeof(ch); - swch = sizeof(wch); + char *pfch, *pmbch; + size_t sfch, smbch; + + pfch = (char *) &fch; + pmbch = (char *) &mbch; + sfch = sizeof(fch); + smbch = sizeof(mbch); + + if (iconv(conv, &pfch, &sfch, &pmbch, &smbch) == (size_t) -1) { + fprintf(stderr, "Could not convert font glyph 0x%02x " + "from %s to a multibyte character: %s\n", + fch, font_charset, strerror(errno)); + return; + } - if (iconv(conv, &pch, &sch, &pwch, &swch) == (size_t) -1) { - fprintf(stderr, "Could not convert 0x%02x from %s to WCHAR_T: %s\n", - ch, font_charset, strerror(errno)); - } else { - vga_to_curses[ch].chars[0] = wch; + if (mbtowc(&wch, mbch, sizeof(mbch) - smbch) == -1) { + fprintf(stderr, "Could not convert font glyph 0x%02x " + "from a multibyte character to wchar_t: %s\n", + fch, strerror(errno)); + return; } + vga_to_curses[fch].chars[0] = wch; } /* Convert one wchar to UCS-2 */ static uint16_t get_ucs(wchar_t wch, iconv_t conv) { - uint16_t ch; - char *pch, *pwch; - size_t sch, swch; - - pch = (char *) &ch; - pwch = (char *) &wch; - sch = sizeof(ch); - swch = sizeof(wch); - - if (iconv(conv, &pwch, &swch, &pch, &sch) == (size_t) -1) { - fprintf(stderr, "Could not convert 0x%02lx from WCHAR_T to UCS-2: %s\n", - (unsigned long)wch, strerror(errno)); + char mbch[MB_CUR_MAX]; + uint16_t uch; + char *pmbch, *puch; + size_t smbch, such; + int ret; + + ret = wctomb(mbch, wch); + if (ret == -1) { + fprintf(stderr, "Could not convert 0x%04x " + "from wchar_t to a multibyte character: %s\n", + wch, strerror(errno)); + return 0xFFFD; + } + + pmbch = (char *) mbch; + puch = (char *) &uch; + smbch = ret; + such = sizeof(uch); + + if (iconv(conv, &pmbch, &smbch, &puch, &such) == (size_t) -1) { + fprintf(stderr, "Could not convert 0x%04x " + "from a multibyte character to UCS-2 : %s\n", + wch, strerror(errno)); return 0xFFFD; } - return ch; + return uch; } /* @@ -466,6 +503,11 @@ static uint16_t get_ucs(wchar_t wch, iconv_t conv) */ static void font_setup(void) { + iconv_t ucs2_to_nativecharset; + iconv_t nativecharset_to_ucs2; + iconv_t font_conv; + int i; + /* * Control characters are normally non-printable, but VGA does have * well-known glyphs for them. @@ -505,30 +547,25 @@ static void font_setup(void) 0x25bc }; - iconv_t ucs_to_wchar_conv; - iconv_t wchar_to_ucs_conv; - iconv_t font_conv; - int i; - - ucs_to_wchar_conv = iconv_open("WCHAR_T", "UCS-2"); - if (ucs_to_wchar_conv == (iconv_t) -1) { + ucs2_to_nativecharset = iconv_open(nl_langinfo(CODESET), "UCS-2"); + if (ucs2_to_nativecharset == (iconv_t) -1) { fprintf(stderr, "Could not convert font glyphs from UCS-2: '%s'\n", strerror(errno)); exit(1); } - wchar_to_ucs_conv = iconv_open("UCS-2", "WCHAR_T"); - if (wchar_to_ucs_conv == (iconv_t) -1) { - iconv_close(ucs_to_wchar_conv); + nativecharset_to_ucs2 = iconv_open("UCS-2", nl_langinfo(CODESET)); + if (nativecharset_to_ucs2 == (iconv_t) -1) { + iconv_close(ucs2_to_nativecharset); fprintf(stderr, "Could not convert font glyphs to UCS-2: '%s'\n", strerror(errno)); exit(1); } - font_conv = iconv_open("WCHAR_T", font_charset); + font_conv = iconv_open(nl_langinfo(CODESET), font_charset); if (font_conv == (iconv_t) -1) { - iconv_close(ucs_to_wchar_conv); - iconv_close(wchar_to_ucs_conv); + iconv_close(ucs2_to_nativecharset); + iconv_close(nativecharset_to_ucs2); fprintf(stderr, "Could not convert font glyphs from %s: '%s'\n", font_charset, strerror(errno)); exit(1); @@ -536,7 +573,7 @@ static void font_setup(void) /* Control characters */ for (i = 0; i <= 0x1F; i++) { - convert_ucs(i, control_characters[i], ucs_to_wchar_conv); + convert_ucs(i, control_characters[i], ucs2_to_nativecharset); } for (i = 0x20; i <= 0xFF; i++) { @@ -544,12 +581,12 @@ static void font_setup(void) } /* DEL */ - convert_ucs(0x7F, 0x2302, ucs_to_wchar_conv); + convert_ucs(0x7F, 0x2302, ucs2_to_nativecharset); if (strcmp(nl_langinfo(CODESET), "UTF-8")) { /* Non-Unicode capable, use termcap equivalents for those available */ for (i = 0; i <= 0xFF; i++) { - switch (get_ucs(vga_to_curses[i].chars[0], wchar_to_ucs_conv)) { + switch (get_ucs(vga_to_curses[i].chars[0], nativecharset_to_ucs2)) { case 0x00a3: vga_to_curses[i] = *WACS_STERLING; break; @@ -649,8 +686,8 @@ static void font_setup(void) } } } - iconv_close(ucs_to_wchar_conv); - iconv_close(wchar_to_ucs_conv); + iconv_close(ucs2_to_nativecharset); + iconv_close(nativecharset_to_ucs2); iconv_close(font_conv); }
E.g. BSD and Solaris even use locale-specific encoding there. We thus have to go through the native multibyte representation and use mbtowc/wctomb to make a proper conversion. Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org> --- ui/curses.c | 151 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 94 insertions(+), 57 deletions(-)