diff mbox series

[v2,3/5] mailinfo: skip quoted CR on user's wish

Message ID 3215ea95cf869c8495d95cfd774973c330c14d1d.1620148732.git.congdanhqx@gmail.com (mailing list archive)
State Superseded
Headers show
Series Teach am/mailinfo to process quoted CR | expand

Commit Message

Đoàn Trần Công Danh May 4, 2021, 5:20 p.m. UTC
In previous change, we've turned on warning for quoted CR in base64
encoded email. Despite those warnings are usually helpful for our users,
they may expect quoted CR in their emails.

Let's give them an option to turn off the warning completely.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Documentation/git-mailinfo.txt | 18 +++++++++++++++++-
 builtin/mailinfo.c             |  8 ++++++--
 mailinfo.c                     | 21 ++++++++++++++++++++-
 mailinfo.h                     |  8 ++++++++
 t/t5100-mailinfo.sh            |  6 ++++--
 5 files changed, 55 insertions(+), 6 deletions(-)

Comments

Junio C Hamano May 5, 2021, 4:12 a.m. UTC | #1
Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> Subject: Re: [PATCH v2 3/5] mailinfo: skip quoted CR on user's wish

Nothing wrong per-se, but "on user's wish" feel somewhat bizarre.
Perhaps

    mailinfo: allow skipping quoted CR

or something along that line?

> In previous change, we've turned on warning for quoted CR in base64
> encoded email. Despite those warnings are usually helpful for our users,
> they may expect quoted CR in their emails.
>
> Let's give them an option to turn off the warning completely.
>
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
>  Documentation/git-mailinfo.txt | 18 +++++++++++++++++-
>  builtin/mailinfo.c             |  8 ++++++--
>  mailinfo.c                     | 21 ++++++++++++++++++++-
>  mailinfo.h                     |  8 ++++++++
>  t/t5100-mailinfo.sh            |  6 ++++--
>  5 files changed, 55 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
> index d343f040f5..c776b27515 100644
> --- a/Documentation/git-mailinfo.txt
> +++ b/Documentation/git-mailinfo.txt
> @@ -9,7 +9,7 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message
>  SYNOPSIS
>  --------
>  [verse]
> -'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
> +'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] [--quoted-cr=<action>] <msg> <patch>

This line is getting really crowded.  Perhaps it is time to do

	'git mailinfo' [<options>] <msg> <patch>

like other Git subcommands with too many options?  Certainly it can
be done after the dust settles from this entire series as a follow up
clean-up patch.

> @@ -89,6 +89,22 @@ This can be enabled by default with the configuration option mailinfo.scissors.
>  --no-scissors::
>  	Ignore scissors lines. Useful for overriding mailinfo.scissors settings.
>  
> +--quoted-cr=<action>::
> +	Action when processes email messages sent with base64 or
> +	quoted-printable encoding, and the decoded lines end with CR-LF

s/with CR-LF/with a CRLF/

> +	instead of a simple LF.
> ++
> +The valid actions are:
> ++
> +--
> +*	`nowarn`: Git will do nothing with this action.

s/with this action./when such a CRLF is seen./ perhaps?

> +*	`warn`: Git will issue a warning for each message if such CR-LF is

s/such CR-LF/such a CRLF/

> diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
> index b309badce5..1d600263cb 100644
> --- a/builtin/mailinfo.c
> +++ b/builtin/mailinfo.c
> @@ -9,7 +9,7 @@
>  #include "mailinfo.h"
>  
>  static const char mailinfo_usage[] =
> -	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
> +	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] [--quoted-cr=<action>] <msg> <patch> < mail >info";

It is surprising that we haven't switched this to parse_options().
It of course is outside the scope of this series, but from a cursory
look of its option parsing loop, it looks like a trivial improvement
to make.

> @@ -43,7 +43,11 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
>  			mi.use_scissors = 0;
>  		else if (!strcmp(argv[1], "--no-inbody-headers"))
>  			mi.use_inbody_headers = 0;
> -		else
> +		else if (skip_prefix(argv[1], "--quoted-cr=", &str)) {
> +			mi.quoted_cr = mailinfo_parse_quoted_cr_action(str);
> +			if (mi.quoted_cr == quoted_cr_invalid_action)
> +				usage(mailinfo_usage);

This is not all that helpful, given that mailinfo_usage[] only says
<action> without saying what the supported values are, and the
message does not make it clear it was issued while looking at the
--quoted-cr option.

At least, something like

			if (mi.quoted_cr == quoted_cr_invalid_action)
                        	die("--quoted-cr=%s: invalid action", str);

would be more palatable, but I wonder if mailinfo_parse_quoted_cr_action()
should have an option to die with the list of actions it knows about
in a message.

> diff --git a/mailinfo.c b/mailinfo.c
> index 713567f84b..fe7ffd01d0 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -1040,7 +1040,8 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
>  
>  static void summarize_quoted_cr(struct mailinfo *mi, int have_quoted_cr)
>  {
> -	if (have_quoted_cr)
> +	if (have_quoted_cr
> +	    && mi->quoted_cr == quoted_cr_warn)

Existing code in this file prefers to split a multi-line statement
after sequence point like &&, ||, etc., not before.

>  		warning("quoted CR detected");
>  }
>  
> @@ -1221,9 +1222,19 @@ int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
>  	return mi->input_error;
>  }
>  
> +enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action)
> +{
> +	if (!strcmp(action, "nowarn"))
> +		return quoted_cr_nowarn;
> +	else if (!strcmp(action, "warn"))
> +		return quoted_cr_warn;
> +	return quoted_cr_invalid_action;
> +}

OK.

>  static int git_mailinfo_config(const char *var, const char *value, void *mi_)
>  {
>  	struct mailinfo *mi = mi_;
> +	const char *str;
>  
>  	if (!starts_with(var, "mailinfo."))
>  		return git_default_config(var, value, NULL);
> @@ -1231,6 +1242,13 @@ static int git_mailinfo_config(const char *var, const char *value, void *mi_)
>  		mi->use_scissors = git_config_bool(var, value);
>  		return 0;
>  	}
> +	if (!strcmp(var, "mailinfo.quotedcr")) {
> +		git_config_string(&str, var, value);
> +		mi->quoted_cr = mailinfo_parse_quoted_cr_action(str);
> +		if (mi->quoted_cr == quoted_cr_invalid_action)
> +			die(_("bad action '%s' for '%s'"), str, var);
> +		free((void *)str);
> +	}

Here, it is more reasonable.  It still does not say what actions are
accepted, but at least the user learns where our displeasure comes
from.

>  	/* perhaps others here */
>  	return 0;
>  }
Đoàn Trần Công Danh May 5, 2021, 3:53 p.m. UTC | #2
On 2021-05-05 13:12:12+0900, Junio C Hamano <gitster@pobox.com> wrote:
> > diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
> > index d343f040f5..c776b27515 100644
> > --- a/Documentation/git-mailinfo.txt
> > +++ b/Documentation/git-mailinfo.txt
> > @@ -9,7 +9,7 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message
> >  SYNOPSIS
> >  --------
> >  [verse]
> > -'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
> > +'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] [--quoted-cr=<action>] <msg> <patch>
> 
> This line is getting really crowded.  Perhaps it is time to do
> 
> 	'git mailinfo' [<options>] <msg> <patch>
> 
> like other Git subcommands with too many options?  Certainly it can
> be done after the dust settles from this entire series as a follow up
> clean-up patch.

Yes, I think it's time to do that clean-up.

> >  static const char mailinfo_usage[] =
> > -	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
> > +	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] [--quoted-cr=<action>] <msg> <patch> < mail >info";
> 
> It is surprising that we haven't switched this to parse_options().
> It of course is outside the scope of this series, but from a cursory
> look of its option parsing loop, it looks like a trivial improvement
> to make.

And given that we also need 1/5 (otherwise, we need a new declaration
for "const char *str"), I think it would be better to turn 1/5 to the
conversion to parse_option.

> > @@ -43,7 +43,11 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
> >  			mi.use_scissors = 0;
> >  		else if (!strcmp(argv[1], "--no-inbody-headers"))
> >  			mi.use_inbody_headers = 0;
> > -		else
> > +		else if (skip_prefix(argv[1], "--quoted-cr=", &str)) {
> > +			mi.quoted_cr = mailinfo_parse_quoted_cr_action(str);
> > +			if (mi.quoted_cr == quoted_cr_invalid_action)
> > +				usage(mailinfo_usage);
> 
> This is not all that helpful, given that mailinfo_usage[] only says
> <action> without saying what the supported values are, and the
> message does not make it clear it was issued while looking at the
> --quoted-cr option.
> 
> At least, something like
> 
> 			if (mi.quoted_cr == quoted_cr_invalid_action)
>                         	die("--quoted-cr=%s: invalid action", str);
> 
> would be more palatable, but I wonder if mailinfo_parse_quoted_cr_action()
> should have an option to die with the list of actions it knows about
> in a message.

I tempted to remove the _invalid_action with the re-roll and always
die when it doesn't understand the actions instead.
Let's see how far I can get with that approach.
diff mbox series

Patch

diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
index d343f040f5..c776b27515 100644
--- a/Documentation/git-mailinfo.txt
+++ b/Documentation/git-mailinfo.txt
@@ -9,7 +9,7 @@  git-mailinfo - Extracts patch and authorship from a single e-mail message
 SYNOPSIS
 --------
 [verse]
-'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
+'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] [--quoted-cr=<action>] <msg> <patch>
 
 
 DESCRIPTION
@@ -89,6 +89,22 @@  This can be enabled by default with the configuration option mailinfo.scissors.
 --no-scissors::
 	Ignore scissors lines. Useful for overriding mailinfo.scissors settings.
 
+--quoted-cr=<action>::
+	Action when processes email messages sent with base64 or
+	quoted-printable encoding, and the decoded lines end with CR-LF
+	instead of a simple LF.
++
+The valid actions are:
++
+--
+*	`nowarn`: Git will do nothing with this action.
+*	`warn`: Git will issue a warning for each message if such CR-LF is
+	found.
+--
++
+The default action could be set by configuration option `mailinfo.quotedCR`.
+If no such configuration option has been set, `warn` will be used.
+
 <msg>::
 	The commit log message extracted from e-mail, usually
 	except the title line which comes from e-mail Subject.
diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index b309badce5..1d600263cb 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -9,7 +9,7 @@ 
 #include "mailinfo.h"
 
 static const char mailinfo_usage[] =
-	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
+	"git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] [--quoted-cr=<action>] <msg> <patch> < mail >info";
 
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
@@ -43,7 +43,11 @@  int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 			mi.use_scissors = 0;
 		else if (!strcmp(argv[1], "--no-inbody-headers"))
 			mi.use_inbody_headers = 0;
-		else
+		else if (skip_prefix(argv[1], "--quoted-cr=", &str)) {
+			mi.quoted_cr = mailinfo_parse_quoted_cr_action(str);
+			if (mi.quoted_cr == quoted_cr_invalid_action)
+				usage(mailinfo_usage);
+		} else
 			usage(mailinfo_usage);
 		argc--; argv++;
 	}
diff --git a/mailinfo.c b/mailinfo.c
index 713567f84b..fe7ffd01d0 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -1040,7 +1040,8 @@  static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 
 static void summarize_quoted_cr(struct mailinfo *mi, int have_quoted_cr)
 {
-	if (have_quoted_cr)
+	if (have_quoted_cr
+	    && mi->quoted_cr == quoted_cr_warn)
 		warning("quoted CR detected");
 }
 
@@ -1221,9 +1222,19 @@  int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
 	return mi->input_error;
 }
 
+enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action)
+{
+	if (!strcmp(action, "nowarn"))
+		return quoted_cr_nowarn;
+	else if (!strcmp(action, "warn"))
+		return quoted_cr_warn;
+	return quoted_cr_invalid_action;
+}
+
 static int git_mailinfo_config(const char *var, const char *value, void *mi_)
 {
 	struct mailinfo *mi = mi_;
+	const char *str;
 
 	if (!starts_with(var, "mailinfo."))
 		return git_default_config(var, value, NULL);
@@ -1231,6 +1242,13 @@  static int git_mailinfo_config(const char *var, const char *value, void *mi_)
 		mi->use_scissors = git_config_bool(var, value);
 		return 0;
 	}
+	if (!strcmp(var, "mailinfo.quotedcr")) {
+		git_config_string(&str, var, value);
+		mi->quoted_cr = mailinfo_parse_quoted_cr_action(str);
+		if (mi->quoted_cr == quoted_cr_invalid_action)
+			die(_("bad action '%s' for '%s'"), str, var);
+		free((void *)str);
+	}
 	/* perhaps others here */
 	return 0;
 }
@@ -1243,6 +1261,7 @@  void setup_mailinfo(struct mailinfo *mi)
 	strbuf_init(&mi->charset, 0);
 	strbuf_init(&mi->log_message, 0);
 	strbuf_init(&mi->inbody_header_accum, 0);
+	mi->quoted_cr = quoted_cr_warn;
 	mi->header_stage = 1;
 	mi->use_inbody_headers = 1;
 	mi->content_top = mi->content;
diff --git a/mailinfo.h b/mailinfo.h
index 79b1d6774e..1bcef5a6f3 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -5,6 +5,12 @@ 
 
 #define MAX_BOUNDARIES 5
 
+enum quoted_cr_action {
+	quoted_cr_nowarn,
+	quoted_cr_warn,
+	quoted_cr_invalid_action
+};
+
 struct mailinfo {
 	FILE *input;
 	FILE *output;
@@ -14,6 +20,7 @@  struct mailinfo {
 	struct strbuf email;
 	int keep_subject;
 	int keep_non_patch_brackets_in_subject;
+	enum quoted_cr_action quoted_cr;
 	int add_message_id;
 	int use_scissors;
 	int use_inbody_headers;
@@ -39,6 +46,7 @@  struct mailinfo {
 	int input_error;
 };
 
+enum quoted_cr_action mailinfo_parse_quoted_cr_action(const char *action);
 void setup_mailinfo(struct mailinfo *);
 int mailinfo(struct mailinfo *, const char *msg, const char *patch);
 void clear_mailinfo(struct mailinfo *);
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index d8fdda6bea..57b8fc8104 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -236,11 +236,13 @@  check_quoted_cr_mail() {
 	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
 }
 
-test_expect_success 'mailinfo warn CR in base64 encoded email' '
+test_expect_success 'mailinfo handle CR in base64 encoded email' '
 	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
 	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
 	check_quoted_cr_mail &&
-	grep "quoted CR detected" quoted-cr-err
+	grep "quoted CR detected" quoted-cr-err &&
+	check_quoted_cr_mail --quoted-cr=nowarn &&
+	test_must_be_empty quoted-cr-err
 '
 
 test_done