diff mbox series

[v3,2/3] mailinfo.c: avoid strlen on strings that can contains NUL

Message ID 97ede4aab2ece936eab3caf06db5a8f5d0eea4ba.1587426713.git.congdanhqx@gmail.com (mailing list archive)
State New, archived
Headers show
Series Disallow NUL character in mailinfo | expand

Commit Message

Đoàn Trần Công Danh April 20, 2020, 11:54 p.m. UTC
We're passing buffer from strbuf to reencode_string,
which will call strlen(3) on that buffer,
and discard the length of newly created buffer.
Then, we compute the length of the return buffer to attach to strbuf.

During this process, we introduce a discrimination between mail
originally written in utf-8 and other encoding.

* if the email was written in utf-8, we leave it as is. If there is
  a NUL character in that line, we complains loudly:

  	error: a NUL byte in commit log message not allowed.

* if the email was written in other encoding, we truncate the data as
  the NUL character in that line, then we used the truncated line for
  the metadata.

We can do better by reusing all the available information,
and call the underlying lower level function that will be called
indirectly by reencode_string. By doing this, we will also postpone
the NUL character processing to the commit step, which will
complains about the faulty metadata.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 mailinfo.c            |  6 ++++--
 t/t4254-am-corrupt.sh | 44 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/mailinfo.c b/mailinfo.c
index 742fa376ab..0e9911df6d 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -447,19 +447,21 @@  static int convert_to_utf8(struct mailinfo *mi,
 			   struct strbuf *line, const char *charset)
 {
 	char *out;
+	size_t out_len;
 
 	if (!mi->metainfo_charset || !charset || !*charset)
 		return 0;
 
 	if (same_encoding(mi->metainfo_charset, charset))
 		return 0;
-	out = reencode_string(line->buf, mi->metainfo_charset, charset);
+	out = reencode_string_len(line->buf, line->len,
+				  mi->metainfo_charset, charset, &out_len);
 	if (!out) {
 		mi->input_error = -1;
 		return error("cannot convert from %s to %s",
 			     charset, mi->metainfo_charset);
 	}
-	strbuf_attach(line, out, strlen(out), strlen(out));
+	strbuf_attach(line, out, out_len, out_len);
 	return 0;
 }
 
diff --git a/t/t4254-am-corrupt.sh b/t/t4254-am-corrupt.sh
index ddd35498db..1bbc37bc92 100755
--- a/t/t4254-am-corrupt.sh
+++ b/t/t4254-am-corrupt.sh
@@ -3,6 +3,37 @@ 
 test_description='git am with corrupt input'
 . ./test-lib.sh
 
+make_mbox_with_nul () {
+	space=' '
+	q_nul_in_subject=
+	q_nul_in_body=
+	while test $# -ne 0
+	do
+		case "$1" in
+		subject) q_nul_in_subject='=00' ;;
+		body)    q_nul_in_body='=00' ;;
+		esac &&
+		shift
+	done &&
+	cat <<-EOF
+	From ec7364544f690c560304f5a5de9428ea3b978b26 Mon Sep 17 00:00:00 2001
+	From: A U Thor <author@example.com>
+	Date: Sun, 19 Apr 2020 13:42:07 +0700
+	Subject: [PATCH] =?ISO-8859-1?q?=C4=CB${q_nul_in_subject}=D1=CF=D6?=
+	MIME-Version: 1.0
+	Content-Type: text/plain; charset=ISO-8859-1
+	Content-Transfer-Encoding: quoted-printable
+
+	abc${q_nul_in_body}def
+	---
+	diff --git a/afile b/afile
+	new file mode 100644
+	index 0000000000..e69de29bb2
+	--$space
+	2.26.1
+	EOF
+}
+
 test_expect_success setup '
 	# Note the missing "+++" line:
 	cat >bad-patch.diff <<-\EOF &&
@@ -32,4 +63,17 @@  test_expect_success 'try to apply corrupted patch' '
 	test_i18ncmp expected actual
 '
 
+test_expect_success "NUL in commit message's body" '
+	test_when_finished "git am --abort" &&
+	make_mbox_with_nul body >body.patch &&
+	test_must_fail git am body.patch 2>err &&
+	grep "a NUL byte in commit log message not allowed" err
+'
+
+test_expect_failure "NUL in commit message's header" "
+	test_when_finished 'git am --abort' &&
+	make_mbox_with_nul subject >subject.patch &&
+	test_must_fail git am subject.patch
+"
+
 test_done