diff mbox series

[v2,2/3] configure.ac: define ICONV_OMITS_BOM if necessary

Message ID 7c2c6f060393abf3ac831837e4bf8ed366c2fa69.1572596278.git.congdanhqx@gmail.com (mailing list archive)
State New, archived
Headers show
Series Linux with musl libc improvement | expand

Commit Message

Đoàn Trần Công Danh Nov. 1, 2019, 8:25 a.m. UTC
From commit 79444c9294, ("utf8: handle systems that don't write BOM for
UTF-16", 2019-02-12), we're supporting those systems with iconv that
omits BOM with:

    make ICONV_OMITS_BOM=Yes

However, typing the flag all the time is cumbersome and error-prone.

Add a checking into configure script to detect this flag automatically.

Signed-off-by: Doan Tran Cong Danh <congdanhqx@gmail.com>
---
 configure.ac | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

Comments

Jeff King Nov. 1, 2019, 4:56 p.m. UTC | #1
On Fri, Nov 01, 2019 at 03:25:10PM +0700, Doan Tran Cong Danh wrote:

> From commit 79444c9294, ("utf8: handle systems that don't write BOM for
> UTF-16", 2019-02-12), we're supporting those systems with iconv that
> omits BOM with:
> 
>     make ICONV_OMITS_BOM=Yes
> 
> However, typing the flag all the time is cumbersome and error-prone.
> 
> Add a checking into configure script to detect this flag automatically.

s/checking/check/

The patch looks good to me, though my knowledge of both iconv and
autoconf is relatively lacking. I'll assume it works on your system,
though. :)

One thing I noticed:

> +	v = (unsigned char)(out[0]) + (unsigned char)(out[1]);
> +	return v != 0xfe + 0xff;

I wondered at first why not just:

  return v[0] != 0xfe || v[1] != 0xff;

but the answer is that you are catching BOMs of either endianness. We
might at some point need to distinguish the two, but I think for the
purposes of OMITS_BOM, we don't have to care. So we can worry about that
later (if ever).

-Peff
Đoàn Trần Công Danh Nov. 2, 2019, 12:43 a.m. UTC | #2
On 2019-11-01 12:56:57 -0400, Jeff King wrote:
> On Fri, Nov 01, 2019 at 03:25:10PM +0700, Doan Tran Cong Danh wrote:
> > +	v = (unsigned char)(out[0]) + (unsigned char)(out[1]);
> > +	return v != 0xfe + 0xff;
> 
> I wondered at first why not just:
> 
>   return v[0] != 0xfe || v[1] != 0xff;
> 
> but the answer is that you are catching BOMs of either endianness. We
> might at some point need to distinguish the two,

Yes, it's about catching BOM of either endianess. To distinguish
between LE and BE, it'll be better to use AC_C_BIGENDIAN instead (it's
friendlier for cross-compiling).

Anyway, we couldn't compare out[0] and out[1] with 0xf{e,f} directly
because char could be either signed (-2 and -1) or unsigned (0xfe, 0xff).
We must cast it to unsigned char (required to be 8 bits by POSIX),
then let the integer promotion promote it to int (at least 16 bits).
diff mbox series

Patch

diff --git a/configure.ac b/configure.ac
index a43b476402..ecba7e6e51 100644
--- a/configure.ac
+++ b/configure.ac
@@ -844,12 +844,61 @@  AC_MSG_CHECKING([for old iconv()])
 AC_COMPILE_IFELSE([OLDICONVTEST_SRC],
 	[AC_MSG_RESULT([no])],
 	[AC_MSG_RESULT([yes])
+	AC_DEFINE(HAVE_OLD_ICONV, 1)
 	OLD_ICONV=UnfortunatelyYes])
 
 GIT_UNSTASH_FLAGS($ICONVDIR)
 
 GIT_CONF_SUBST([OLD_ICONV])
 
+#
+# Define ICONV_OMITS_BOM if you are on a system which
+# iconv omits bom for utf-{16,32}
+if test -z "$NO_ICONV"; then
+AC_CACHE_CHECK([whether iconv omits bom for utf-16 and utf-32],
+ [ac_cv_iconv_omits_bom],
+[
+old_LIBS="$LIBS"
+if test -n "$NEEDS_LIBICONV"; then
+	LIBS="$LIBS -liconv"
+fi
+
+AC_RUN_IFELSE(
+	[AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT
+	#include <iconv.h>
+	#ifdef HAVE_OLD_ICONV
+	typedef const char *iconv_ibp;
+	#else
+	typedef char *iconv_ibp;
+	#endif
+	],
+	[[
+	int v;
+	iconv_t conv;
+	char in[] = "a"; iconv_ibp pin = in;
+	char out[20] = ""; char *pout = out;
+	size_t isz = sizeof in;
+	size_t osz = sizeof out;
+
+	conv = iconv_open("UTF-16", "UTF-8");
+	iconv(conv, &pin, &isz, &pout, &osz);
+	iconv_close(conv);
+	v = (unsigned char)(out[0]) + (unsigned char)(out[1]);
+	return v != 0xfe + 0xff;
+	]])],
+	[ac_cv_iconv_omits_bom=no],
+	[ac_cv_iconv_omits_bom=yes])
+
+LIBS="$old_LIBS"
+])
+if test "x$ac_cv_iconv_omits_bom" = xyes; then
+	ICONV_OMITS_BOM=Yes
+else
+	ICONV_OMITS_BOM=
+fi
+GIT_CONF_SUBST([ICONV_OMITS_BOM])
+fi
+
 ## Checks for typedefs, structures, and compiler characteristics.
 AC_MSG_NOTICE([CHECKS for typedefs, structures, and compiler characteristics])
 #