diff mbox series

[v2,1/2] diff: consolidate diff algorithm option parsing

Message ID 0c5e1fc6c2651e39bcefa27ee0976c9519671969.1676410819.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Teach diff to honor diff algorithms set through git attributes | expand

Commit Message

John Cai Feb. 14, 2023, 9:40 p.m. UTC
From: John Cai <johncai86@gmail.com>

The diff option parsing for --minimal, --patience, --histgoram can all
be consolidated into one function. This is a preparatory step for the
subsequent commit which teaches diff to keep track of whether or not a

Comments

Junio C Hamano Feb. 15, 2023, 2:38 a.m. UTC | #1
"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: John Cai <johncai86@gmail.com>
>
> The diff option parsing for --minimal, --patience, --histgoram can all
> be consolidated into one function. This is a preparatory step for the
> subsequent commit which teaches diff to keep track of whether or not a
> diff algorithm has been set via the command line.

Everybody other than patience used to be just a bit-op but now
everybody is a callback?

> diff --git a/diff.c b/diff.c
> index 329eebf16a0..92a0eab942e 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -3437,6 +3437,22 @@ static int diff_filepair_is_phoney(struct diff_filespec *one,
>  	return !DIFF_FILE_VALID(one) && !DIFF_FILE_VALID(two);
>  }
>  
> +static int set_diff_algorithm(struct diff_options *opts,
> +			      const char *alg)
> +{
> +	long value = parse_algorithm_value(alg);
> +
> +	if (value < 0)
> +		return 1;
> +
> +	/* clear out previous settings */
> +	DIFF_XDL_CLR(opts, NEED_MINIMAL);
> +	opts->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
> +	opts->xdl_opts |= value;
> +
> +	return 0;
> +}

The above is a faithful copy of diff_opt_diff_algorithm(), except
that it returns 1 (not -1) on failure, which is unexpected in this
codebase, and should be corrected if this patch gets rerolled.

>  static void builtin_diff(const char *name_a,
>  			 const char *name_b,
>  			 struct diff_filespec *one,
> @@ -5107,17 +5123,40 @@ static int diff_opt_diff_algorithm(const struct option *opt,
>  				   const char *arg, int unset)
>  {
>  	struct diff_options *options = opt->value;
> -	long value = parse_algorithm_value(arg);
>  
>  	BUG_ON_OPT_NEG(unset);
> -	if (value < 0)
> +
> +	if (set_diff_algorithm(options, arg))
>  		return error(_("option diff-algorithm accepts \"myers\", "
>  			       "\"minimal\", \"patience\" and \"histogram\""));
>  
> -	/* clear out previous settings */
> -	DIFF_XDL_CLR(options, NEED_MINIMAL);
> -	options->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
> -	options->xdl_opts |= value;
> +	return 0;
> +}

This version of diff_opt_diff_algorithm() behaves identically from
the version before this patch, which is excellent.

> +static int diff_opt_diff_algorithm_no_arg(const struct option *opt,
> +				   const char *arg, int unset)
> +{
> +	struct diff_options *options = opt->value;
> +
> +	BUG_ON_OPT_NEG(unset);
> +	BUG_ON_OPT_ARG(arg);
> +
> +	if (!strcmp(opt->long_name, "patience")) {
> +		size_t i;
> +		/*
> +		 * Both --patience and --anchored use PATIENCE_DIFF
> +		 * internally, so remove any anchors previously
> +		 * specified.
> +		 */
> +		for (i = 0; i < options->anchors_nr; i++)
> +			free(options->anchors[i]);
> +		options->anchors_nr = 0;
> +	}
> +
> +	if (set_diff_algorithm(options, opt->long_name))
> +		BUG("available diff algorithms include \"myers\", "
> +			       "\"minimal\", \"patience\" and \"histogram\"");
> +
>  	return 0;
>  }

Calling this instead of diff_opt_patience() would make "--patience"
parsed identically as before without this patch, which is excellent.

> @@ -5562,9 +5581,10 @@ struct option *add_diff_options(const struct option *opts,
>  			    N_("prevent rename/copy detection if the number of rename/copy targets exceeds given limit")),
>  
>  		OPT_GROUP(N_("Diff algorithm options")),
> -		OPT_BIT(0, "minimal", &options->xdl_opts,
> -			N_("produce the smallest possible diff"),
> -			XDF_NEED_MINIMAL),
> +		OPT_CALLBACK_F(0, "minimal", options, NULL,
> +			       N_("produce the smallest possible diff"),
> +			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
> +			       diff_opt_diff_algorithm_no_arg),

I offhand cannot say that these two are equivalent, even though they
ought to be (otherwise this patch would break things).  The callback
seems to do much more than just a simple "flip the NEED_MINIMAL bit
on".

> -		OPT_BITOP(0, "histogram", &options->xdl_opts,
> -			  N_("generate diff using the \"histogram diff\" algorithm"),
> -			  XDF_HISTOGRAM_DIFF, XDF_DIFF_ALGORITHM_MASK),
> +			       diff_opt_diff_algorithm_no_arg),
> +		OPT_CALLBACK_F(0, "histogram", options, NULL,
> +			       N_("generate diff using the \"histogram diff\" algorithm"),
> +			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
> +			       diff_opt_diff_algorithm_no_arg),

Likewise.

By nature, patience (and anchored) needs to do much more than
everybody else, so it almost feels that it is OK (and preferable,
even) to leave it a special case to make the distinction stand out.
Consolidating everybody else who are much simpler to share the
more complex callback does not look like a good change to me, at
least at the first glance.

Thanks.
John Cai Feb. 15, 2023, 11:34 p.m. UTC | #2
Hi Junio,

On 14 Feb 2023, at 21:38, Junio C Hamano wrote:

> "John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> From: John Cai <johncai86@gmail.com>
>>
>> The diff option parsing for --minimal, --patience, --histgoram can all
>> be consolidated into one function. This is a preparatory step for the
>> subsequent commit which teaches diff to keep track of whether or not a
>> diff algorithm has been set via the command line.
>
> Everybody other than patience used to be just a bit-op but now
> everybody is a callback?
>
>> diff --git a/diff.c b/diff.c
>> index 329eebf16a0..92a0eab942e 100644
>> --- a/diff.c
>> +++ b/diff.c
>> @@ -3437,6 +3437,22 @@ static int diff_filepair_is_phoney(struct diff_filespec *one,
>>  	return !DIFF_FILE_VALID(one) && !DIFF_FILE_VALID(two);
>>  }
>>
>> +static int set_diff_algorithm(struct diff_options *opts,
>> +			      const char *alg)
>> +{
>> +	long value = parse_algorithm_value(alg);
>> +
>> +	if (value < 0)
>> +		return 1;
>> +
>> +	/* clear out previous settings */
>> +	DIFF_XDL_CLR(opts, NEED_MINIMAL);
>> +	opts->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
>> +	opts->xdl_opts |= value;
>> +
>> +	return 0;
>> +}
>
> The above is a faithful copy of diff_opt_diff_algorithm(), except
> that it returns 1 (not -1) on failure, which is unexpected in this
> codebase, and should be corrected if this patch gets rerolled.
>
>>  static void builtin_diff(const char *name_a,
>>  			 const char *name_b,
>>  			 struct diff_filespec *one,
>> @@ -5107,17 +5123,40 @@ static int diff_opt_diff_algorithm(const struct option *opt,
>>  				   const char *arg, int unset)
>>  {
>>  	struct diff_options *options = opt->value;
>> -	long value = parse_algorithm_value(arg);
>>
>>  	BUG_ON_OPT_NEG(unset);
>> -	if (value < 0)
>> +
>> +	if (set_diff_algorithm(options, arg))
>>  		return error(_("option diff-algorithm accepts \"myers\", "
>>  			       "\"minimal\", \"patience\" and \"histogram\""));
>>
>> -	/* clear out previous settings */
>> -	DIFF_XDL_CLR(options, NEED_MINIMAL);
>> -	options->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
>> -	options->xdl_opts |= value;
>> +	return 0;
>> +}
>
> This version of diff_opt_diff_algorithm() behaves identically from
> the version before this patch, which is excellent.
>
>> +static int diff_opt_diff_algorithm_no_arg(const struct option *opt,
>> +				   const char *arg, int unset)
>> +{
>> +	struct diff_options *options = opt->value;
>> +
>> +	BUG_ON_OPT_NEG(unset);
>> +	BUG_ON_OPT_ARG(arg);
>> +
>> +	if (!strcmp(opt->long_name, "patience")) {
>> +		size_t i;
>> +		/*
>> +		 * Both --patience and --anchored use PATIENCE_DIFF
>> +		 * internally, so remove any anchors previously
>> +		 * specified.
>> +		 */
>> +		for (i = 0; i < options->anchors_nr; i++)
>> +			free(options->anchors[i]);
>> +		options->anchors_nr = 0;
>> +	}
>> +
>> +	if (set_diff_algorithm(options, opt->long_name))
>> +		BUG("available diff algorithms include \"myers\", "
>> +			       "\"minimal\", \"patience\" and \"histogram\"");
>> +
>>  	return 0;
>>  }
>
> Calling this instead of diff_opt_patience() would make "--patience"
> parsed identically as before without this patch, which is excellent.
>
>> @@ -5562,9 +5581,10 @@ struct option *add_diff_options(const struct option *opts,
>>  			    N_("prevent rename/copy detection if the number of rename/copy targets exceeds given limit")),
>>
>>  		OPT_GROUP(N_("Diff algorithm options")),
>> -		OPT_BIT(0, "minimal", &options->xdl_opts,
>> -			N_("produce the smallest possible diff"),
>> -			XDF_NEED_MINIMAL),
>> +		OPT_CALLBACK_F(0, "minimal", options, NULL,
>> +			       N_("produce the smallest possible diff"),
>> +			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
>> +			       diff_opt_diff_algorithm_no_arg),
>
> I offhand cannot say that these two are equivalent, even though they
> ought to be (otherwise this patch would break things).  The callback
> seems to do much more than just a simple "flip the NEED_MINIMAL bit
> on".
>
>> -		OPT_BITOP(0, "histogram", &options->xdl_opts,
>> -			  N_("generate diff using the \"histogram diff\" algorithm"),
>> -			  XDF_HISTOGRAM_DIFF, XDF_DIFF_ALGORITHM_MASK),
>> +			       diff_opt_diff_algorithm_no_arg),
>> +		OPT_CALLBACK_F(0, "histogram", options, NULL,
>> +			       N_("generate diff using the \"histogram diff\" algorithm"),
>> +			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
>> +			       diff_opt_diff_algorithm_no_arg),
>
> Likewise.
>
> By nature, patience (and anchored) needs to do much more than
> everybody else, so it almost feels that it is OK (and preferable,
> even) to leave it a special case to make the distinction stand out.
> Consolidating everybody else who are much simpler to share the
> more complex callback does not look like a good change to me, at
> least at the first glance.

Yeah, it's not great to pull things out from a bit flip to a callback but I
needed some way of setting the xdl_opts_command_line member in the next commit
when we know that the diff algorithm was set via options on the command line so
that we can honor the precedence. If there's a different way to do that other
than using callbacks for command options parsing, I'd agree that keeping these
as bit flips would be ideal.

>
> Thanks.

Thanks!
John
Junio C Hamano Feb. 15, 2023, 11:42 p.m. UTC | #3
John Cai <johncai86@gmail.com> writes:

> Yeah, it's not great to pull things out from a bit flip to a callback but I
> needed some way of setting the xdl_opts_command_line member in the next commit

If that is the case (and after reading [2/2], of course, readers of
the series can tell that it was the reason), it would be good to be
honest and document _that_ as the reason why you are using callbacks
for all of them in the proposed log message for [1/2].

And I think that is a reasonable way to use callback to do "more
than just setting a bit".  Even in that case, I am not sure if it is
a good idea to share the same callback that has conditional code
that only is relevant to the "patience" case, though.

THanks.
Jeff King Feb. 16, 2023, 2:14 a.m. UTC | #4
On Wed, Feb 15, 2023 at 03:42:12PM -0800, Junio C Hamano wrote:

> And I think that is a reasonable way to use callback to do "more
> than just setting a bit".  Even in that case, I am not sure if it is
> a good idea to share the same callback that has conditional code
> that only is relevant to the "patience" case, though.

I have to admit I did a double-take at the string comparisons with
opt->long. I guess we can rely on it, since it is coming from the
options struct (and is not the string that the user wrote on the command
line!). But it still feels a little error-prone, just because there's no
help from the type system or the compiler.

I'd have expected it with individual callbacks like:

  int handle_patience(...)
  {
	do_patience_specific_stuff();
	do_shared_stuff(PATIENCE_DIFF);
  }

  int handle_histogram(...)
  {
	do_shared_stuff(HISTOGRAM_DIFF);
  }

and so on. That's a bit more verbose, but the call stack reflects the
flow we expect.

I can live with it either way, though.

-Peff
Junio C Hamano Feb. 16, 2023, 2:57 a.m. UTC | #5
Jeff King <peff@peff.net> writes:

> I'd have expected it with individual callbacks like:
>
>   int handle_patience(...)
>   {
> 	do_patience_specific_stuff();
> 	do_shared_stuff(PATIENCE_DIFF);
>   }
>
>   int handle_histogram(...)
>   {
> 	do_shared_stuff(HISTOGRAM_DIFF);
>   }
>
> and so on. That's a bit more verbose, but the call stack reflects the
> flow we expect.

Exactly.

Thanks for spelling it out.
John Cai Feb. 16, 2023, 8:34 p.m. UTC | #6
On 23/02/15 06:57PM, Junio C Hamano wrote:
> Jeff King <peff@peff.net> writes:
> 
> > I'd have expected it with individual callbacks like:
> >
> >   int handle_patience(...)
> >   {
> > 	do_patience_specific_stuff();
> > 	do_shared_stuff(PATIENCE_DIFF);
> >   }
> >
> >   int handle_histogram(...)
> >   {
> > 	do_shared_stuff(HISTOGRAM_DIFF);
> >   }
> >
> > and so on. That's a bit more verbose, but the call stack reflects the
> > flow we expect.
> 
> Exactly.
> 
> Thanks for spelling it out.

Thanks for this suggestion--makes sense to me. Will incorporate in the next
version.
diff mbox series

Patch

diff algorithm has been set via the command line.

Additionally, the logic that sets the diff algorithm in
diff_opt_diff_algorithm() can  be refactored into a helper that will
allow multiple callsites to set the diff algorithm.

Signed-off-by: John Cai <johncai86@gmail.com>
---
 diff.c | 87 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 54 insertions(+), 33 deletions(-)

diff --git a/diff.c b/diff.c
index 329eebf16a0..92a0eab942e 100644
--- a/diff.c
+++ b/diff.c
@@ -3437,6 +3437,22 @@  static int diff_filepair_is_phoney(struct diff_filespec *one,
 	return !DIFF_FILE_VALID(one) && !DIFF_FILE_VALID(two);
 }
 
+static int set_diff_algorithm(struct diff_options *opts,
+			      const char *alg)
+{
+	long value = parse_algorithm_value(alg);
+
+	if (value < 0)
+		return 1;
+
+	/* clear out previous settings */
+	DIFF_XDL_CLR(opts, NEED_MINIMAL);
+	opts->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
+	opts->xdl_opts |= value;
+
+	return 0;
+}
+
 static void builtin_diff(const char *name_a,
 			 const char *name_b,
 			 struct diff_filespec *one,
@@ -5107,17 +5123,40 @@  static int diff_opt_diff_algorithm(const struct option *opt,
 				   const char *arg, int unset)
 {
 	struct diff_options *options = opt->value;
-	long value = parse_algorithm_value(arg);
 
 	BUG_ON_OPT_NEG(unset);
-	if (value < 0)
+
+	if (set_diff_algorithm(options, arg))
 		return error(_("option diff-algorithm accepts \"myers\", "
 			       "\"minimal\", \"patience\" and \"histogram\""));
 
-	/* clear out previous settings */
-	DIFF_XDL_CLR(options, NEED_MINIMAL);
-	options->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
-	options->xdl_opts |= value;
+	return 0;
+}
+
+static int diff_opt_diff_algorithm_no_arg(const struct option *opt,
+				   const char *arg, int unset)
+{
+	struct diff_options *options = opt->value;
+
+	BUG_ON_OPT_NEG(unset);
+	BUG_ON_OPT_ARG(arg);
+
+	if (!strcmp(opt->long_name, "patience")) {
+		size_t i;
+		/*
+		 * Both --patience and --anchored use PATIENCE_DIFF
+		 * internally, so remove any anchors previously
+		 * specified.
+		 */
+		for (i = 0; i < options->anchors_nr; i++)
+			free(options->anchors[i]);
+		options->anchors_nr = 0;
+	}
+
+	if (set_diff_algorithm(options, opt->long_name))
+		BUG("available diff algorithms include \"myers\", "
+			       "\"minimal\", \"patience\" and \"histogram\"");
+
 	return 0;
 }
 
@@ -5242,26 +5281,6 @@  static enum parse_opt_result diff_opt_output(struct parse_opt_ctx_t *ctx,
 	return 0;
 }
 
-static int diff_opt_patience(const struct option *opt,
-			     const char *arg, int unset)
-{
-	struct diff_options *options = opt->value;
-	int i;
-
-	BUG_ON_OPT_NEG(unset);
-	BUG_ON_OPT_ARG(arg);
-	options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
-	/*
-	 * Both --patience and --anchored use PATIENCE_DIFF
-	 * internally, so remove any anchors previously
-	 * specified.
-	 */
-	for (i = 0; i < options->anchors_nr; i++)
-		free(options->anchors[i]);
-	options->anchors_nr = 0;
-	return 0;
-}
-
 static int diff_opt_ignore_regex(const struct option *opt,
 				 const char *arg, int unset)
 {
@@ -5562,9 +5581,10 @@  struct option *add_diff_options(const struct option *opts,
 			    N_("prevent rename/copy detection if the number of rename/copy targets exceeds given limit")),
 
 		OPT_GROUP(N_("Diff algorithm options")),
-		OPT_BIT(0, "minimal", &options->xdl_opts,
-			N_("produce the smallest possible diff"),
-			XDF_NEED_MINIMAL),
+		OPT_CALLBACK_F(0, "minimal", options, NULL,
+			       N_("produce the smallest possible diff"),
+			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
+			       diff_opt_diff_algorithm_no_arg),
 		OPT_BIT_F('w', "ignore-all-space", &options->xdl_opts,
 			  N_("ignore whitespace when comparing lines"),
 			  XDF_IGNORE_WHITESPACE, PARSE_OPT_NONEG),
@@ -5589,10 +5609,11 @@  struct option *add_diff_options(const struct option *opts,
 		OPT_CALLBACK_F(0, "patience", options, NULL,
 			       N_("generate diff using the \"patience diff\" algorithm"),
 			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
-			       diff_opt_patience),
-		OPT_BITOP(0, "histogram", &options->xdl_opts,
-			  N_("generate diff using the \"histogram diff\" algorithm"),
-			  XDF_HISTOGRAM_DIFF, XDF_DIFF_ALGORITHM_MASK),
+			       diff_opt_diff_algorithm_no_arg),
+		OPT_CALLBACK_F(0, "histogram", options, NULL,
+			       N_("generate diff using the \"histogram diff\" algorithm"),
+			       PARSE_OPT_NONEG | PARSE_OPT_NOARG,
+			       diff_opt_diff_algorithm_no_arg),
 		OPT_CALLBACK_F(0, "diff-algorithm", options, N_("<algorithm>"),
 			       N_("choose a diff algorithm"),
 			       PARSE_OPT_NONEG, diff_opt_diff_algorithm),