diff mbox series

[v2,3/6] parse-options: factor out register_abbrev() and struct parsed_option

Message ID 20240303121944.20627-4-l.s.r@web.de (mailing list archive)
State New
Headers show
Series parse-options: long option parsing fixes and cleanup | expand

Commit Message

René Scharfe March 3, 2024, 12:19 p.m. UTC
Add a function, register_abbrev(), for storing the necessary details for
remembering an abbreviated and thus potentially ambiguous option.  Call
it instead of sharing the code using goto, to make the control flow more
explicit.

Conveniently collect these details in the new struct parsed_option to
reduce the number of necessary function arguments.

Signed-off-by: René Scharfe <l.s.r@web.de>
---
 parse-options.c | 83 +++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 34 deletions(-)

--
2.44.0

Comments

Josh Steadmon March 12, 2024, 9:43 p.m. UTC | #1
I found this change to be hard to follow, although I'm not sure anything
actually needs to be changed. Thinking aloud below, apologies for being
verbose.

On 2024.03.03 13:19, René Scharfe wrote:
> Add a function, register_abbrev(), for storing the necessary details for
> remembering an abbreviated and thus potentially ambiguous option.  Call
> it instead of sharing the code using goto, to make the control flow more
> explicit.
> 
> Conveniently collect these details in the new struct parsed_option to
> reduce the number of necessary function arguments.
> 
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
>  parse-options.c | 83 +++++++++++++++++++++++++++++--------------------
>  1 file changed, 49 insertions(+), 34 deletions(-)
> 
> diff --git a/parse-options.c b/parse-options.c
> index 056c6b30e9..398ebaef14 100644
> --- a/parse-options.c
> +++ b/parse-options.c
> @@ -350,14 +350,40 @@ static int is_alias(struct parse_opt_ctx_t *ctx,
>  	return 0;
>  }
> 
> +struct parsed_option {
> +	const struct option *option;
> +	enum opt_parsed flags;
> +};

We're bundling up the state for previously-examined options that "arg"
might expand to. Looks good.


> +static void register_abbrev(struct parse_opt_ctx_t *p,
> +			    const struct option *option, enum opt_parsed flags,
> +			    struct parsed_option *abbrev,
> +			    struct parsed_option *ambiguous)
> +{

Here we're defining a function to separate out the logic that we
previously jumped to with "goto is_abbreviated;". Looks good.


> +	if (p->flags & PARSE_OPT_KEEP_UNKNOWN_OPT)
> +		return;

This is the (negation of the) allow_abbrev condition that was removed
below. Looks good.

> +	if (abbrev->option &&
> +	    !is_alias(p, abbrev->option, option)) {
> +		/*
> +		 * If this is abbreviated, it is
> +		 * ambiguous. So when there is no
> +		 * exact match later, we need to
> +		 * error out.
> +		 */
> +		ambiguous->option = abbrev->option;
> +		ambiguous->flags = abbrev->flags;

Couldn't this just be "*ambiguous = *abbrev;" ?


> +	}
> +	abbrev->option = option;
> +	abbrev->flags = flags;
> +}

So, we've found a candidate that matches our abbreviation. If this is
the second (or later) candidate, then we've got an ambiguous
abbreviation, which we'll need to warn about later. Since we're
overwriting both "ambiguous" and "abbrev", we'll only refer to the last
two candidates seen, but that seems fine.

>  static enum parse_opt_result parse_long_opt(
>  	struct parse_opt_ctx_t *p, const char *arg,
>  	const struct option *options)
>  {
>  	const char *arg_end = strchrnul(arg, '=');
> -	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
> -	enum opt_parsed abbrev_flags = OPT_LONG, ambiguous_flags = OPT_LONG;
> -	int allow_abbrev = !(p->flags & PARSE_OPT_KEEP_UNKNOWN_OPT);
> +	struct parsed_option abbrev = { .option = NULL, .flags = OPT_LONG };
> +	struct parsed_option ambiguous = { .option = NULL, .flags = OPT_LONG };
> 
>  	for (; options->type != OPTION_END; options++) {
>  		const char *rest, *long_name = options->long_name;
> @@ -377,31 +403,20 @@ static enum parse_opt_result parse_long_opt(
>  			rest = NULL;
>  		if (!rest) {
>  			/* abbreviated? */
> -			if (allow_abbrev &&
> -			    !strncmp(long_name, arg, arg_end - arg)) {
> -is_abbreviated:
> -				if (abbrev_option &&
> -				    !is_alias(p, abbrev_option, options)) {
> -					/*
> -					 * If this is abbreviated, it is
> -					 * ambiguous. So when there is no
> -					 * exact match later, we need to
> -					 * error out.
> -					 */
> -					ambiguous_option = abbrev_option;
> -					ambiguous_flags = abbrev_flags;
> -				}
> -				abbrev_option = options;
> -				abbrev_flags = flags ^ opt_flags;
> +			if (!strncmp(long_name, arg, arg_end - arg)) {
> +				register_abbrev(p, options, flags ^ opt_flags,
> +						&abbrev, &ambiguous);

This part I found hard to follow; the change itself is a simple
replacement of the original logic with our new function, but I don't
understand the original logic in the first place. Why do we XOR flags
and opt_flags here?

These are both bitflags which can hold a combination of OPT_SHORT and/or
OPT_UNSET (i.e., we've passed --no-foo). Since we're only looking at
args that we already know are in long form, the only one we should need
to worry about is whether OPT_UNSET is true or false. And indeed, we
never set OPT_SHORT in this function.

IIUC, "flags" corresponds to "arg", the flag we're trying to parse, and
"opt_flags" corresponds to "options", the current item in the list of
all options that we're trying to match against.

So OPT_UNSET will be true in "flags" if either "arg" starts with "no-"
or if it is a prefix of "no-".

OPT_UNSET will be true in "opt_flags" if all of the following are true:
* "arg" does not start with "no-"
* PARSE_OPT_NONEG is not set in options->flags
* options->long_name starts with "no-"

So OPT_UNSET can never be set at the same time in both "flags" and
"opt_flags", and thus the XOR makes a bit more sense: either the
argument we're parsing or the canonical form of the candidate that we're
matching against is negated. We don't care which one, but we need to
know about the negation later, to either set the value properly, or to
report potential ambiguities with the "no-" prefix.

>  				continue;
>  			}
>  			/* negation allowed? */
>  			if (options->flags & PARSE_OPT_NONEG)
>  				continue;
>  			/* negated and abbreviated very much? */
> -			if (allow_abbrev && starts_with("no-", arg)) {
> +			if (starts_with("no-", arg)) {
>  				flags |= OPT_UNSET;
> -				goto is_abbreviated;
> +				register_abbrev(p, options, flags ^ opt_flags,
> +						&abbrev, &ambiguous);
> +				continue;

This is a slight change: we might set OPT_UNSET in flags where before we
were prevented by the "allow_abbrev" condition, but register_abbrev will
still be a no-op in that case, so I don't think this changes any
behavior.

All the other changes in this patch are straightforward. So, despite
having to puzzle out the original logic, everything here looks good.
Junio C Hamano March 13, 2024, 5:48 p.m. UTC | #2
Josh Steadmon <steadmon@google.com> writes:

> I found this change to be hard to follow, although I'm not sure anything
> actually needs to be changed. Thinking aloud below, apologies for being
> verbose.

Thanks for carefully following the code.  It unfortunately has to
get long, but this is the kind of review that I would appreciate
most if I were the author, pointing out what is easy to understand
and more importantly what is harder to follow.
René Scharfe March 13, 2024, 6:47 p.m. UTC | #3
Am 12.03.24 um 22:43 schrieb Josh Steadmon:
> I found this change to be hard to follow, although I'm not sure anything
> actually needs to be changed. Thinking aloud below, apologies for being
> verbose.

Thank you for taking the time to review and share!

> On 2024.03.03 13:19, René Scharfe wrote:
>> Add a function, register_abbrev(), for storing the necessary details for
>> remembering an abbreviated and thus potentially ambiguous option.  Call
>> it instead of sharing the code using goto, to make the control flow more
>> explicit.
>>
>> Conveniently collect these details in the new struct parsed_option to
>> reduce the number of necessary function arguments.
>>
>> Signed-off-by: René Scharfe <l.s.r@web.de>
>> ---
>>  parse-options.c | 83 +++++++++++++++++++++++++++++--------------------
>>  1 file changed, 49 insertions(+), 34 deletions(-)
>>
>> diff --git a/parse-options.c b/parse-options.c
>> index 056c6b30e9..398ebaef14 100644
>> --- a/parse-options.c
>> +++ b/parse-options.c
>> @@ -350,14 +350,40 @@ static int is_alias(struct parse_opt_ctx_t *ctx,
>>  	return 0;
>>  }
>>
>> +struct parsed_option {
>> +	const struct option *option;
>> +	enum opt_parsed flags;
>> +};
>
> We're bundling up the state for previously-examined options that "arg"
> might expand to. Looks good.
>
>
>> +static void register_abbrev(struct parse_opt_ctx_t *p,
>> +			    const struct option *option, enum opt_parsed flags,
>> +			    struct parsed_option *abbrev,
>> +			    struct parsed_option *ambiguous)
>> +{
>
> Here we're defining a function to separate out the logic that we
> previously jumped to with "goto is_abbreviated;". Looks good.
>
>
>> +	if (p->flags & PARSE_OPT_KEEP_UNKNOWN_OPT)
>> +		return;
>
> This is the (negation of the) allow_abbrev condition that was removed
> below. Looks good.
>
>> +	if (abbrev->option &&
>> +	    !is_alias(p, abbrev->option, option)) {
>> +		/*
>> +		 * If this is abbreviated, it is
>> +		 * ambiguous. So when there is no
>> +		 * exact match later, we need to
>> +		 * error out.
>> +		 */
>> +		ambiguous->option = abbrev->option;
>> +		ambiguous->flags = abbrev->flags;
>
> Couldn't this just be "*ambiguous = *abbrev;" ?

It could, but I wanted to keep it as close to the original as possible.
I was hoping the changes made here would be mechanical enough to be
reviewable without requiring a deeper understanding of what the code
actually does, but that didn't quite work out it seems.  Glad you made
it through, though! :)

>
>
>> +	}
>> +	abbrev->option = option;
>> +	abbrev->flags = flags;
>> +}
>
> So, we've found a candidate that matches our abbreviation. If this is
> the second (or later) candidate, then we've got an ambiguous
> abbreviation, which we'll need to warn about later. Since we're
> overwriting both "ambiguous" and "abbrev", we'll only refer to the last
> two candidates seen, but that seems fine.
>
>>  static enum parse_opt_result parse_long_opt(
>>  	struct parse_opt_ctx_t *p, const char *arg,
>>  	const struct option *options)
>>  {
>>  	const char *arg_end = strchrnul(arg, '=');
>> -	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
>> -	enum opt_parsed abbrev_flags = OPT_LONG, ambiguous_flags = OPT_LONG;
>> -	int allow_abbrev = !(p->flags & PARSE_OPT_KEEP_UNKNOWN_OPT);
>> +	struct parsed_option abbrev = { .option = NULL, .flags = OPT_LONG };
>> +	struct parsed_option ambiguous = { .option = NULL, .flags = OPT_LONG };
>>
>>  	for (; options->type != OPTION_END; options++) {
>>  		const char *rest, *long_name = options->long_name;
>> @@ -377,31 +403,20 @@ static enum parse_opt_result parse_long_opt(
>>  			rest = NULL;
>>  		if (!rest) {
>>  			/* abbreviated? */
>> -			if (allow_abbrev &&
>> -			    !strncmp(long_name, arg, arg_end - arg)) {
>> -is_abbreviated:
>> -				if (abbrev_option &&
>> -				    !is_alias(p, abbrev_option, options)) {
>> -					/*
>> -					 * If this is abbreviated, it is
>> -					 * ambiguous. So when there is no
>> -					 * exact match later, we need to
>> -					 * error out.
>> -					 */
>> -					ambiguous_option = abbrev_option;
>> -					ambiguous_flags = abbrev_flags;
>> -				}
>> -				abbrev_option = options;
>> -				abbrev_flags = flags ^ opt_flags;
>> +			if (!strncmp(long_name, arg, arg_end - arg)) {
>> +				register_abbrev(p, options, flags ^ opt_flags,
>> +						&abbrev, &ambiguous);
>
> This part I found hard to follow; the change itself is a simple
> replacement of the original logic with our new function, but I don't
> understand the original logic in the first place. Why do we XOR flags
> and opt_flags here?
>
> These are both bitflags which can hold a combination of OPT_SHORT and/or
> OPT_UNSET (i.e., we've passed --no-foo). Since we're only looking at
> args that we already know are in long form, the only one we should need
> to worry about is whether OPT_UNSET is true or false. And indeed, we
> never set OPT_SHORT in this function.

Right.  We do set OPT_LONG as well, but its value is 0, so that's just
for show.

> IIUC, "flags" corresponds to "arg", the flag we're trying to parse, and
> "opt_flags" corresponds to "options", the current item in the list of
> all options that we're trying to match against.
>
> So OPT_UNSET will be true in "flags" if either "arg" starts with "no-"
> or if it is a prefix of "no-".
>
> OPT_UNSET will be true in "opt_flags" if all of the following are true:
> * "arg" does not start with "no-"
> * PARSE_OPT_NONEG is not set in options->flags
> * options->long_name starts with "no-"

Right.

> So OPT_UNSET can never be set at the same time in both "flags" and
> "opt_flags",

It can if arg is "no" or "n" (the "negated and abbreviated very much?"
path), PARSE_OPT_NONEG is unset and long_name starts with "no-".

> and thus the XOR makes a bit more sense: either the
> argument we're parsing or the canonical form of the candidate that we're
> matching against is negated. We don't care which one, but we need to
> know about the negation later, to either set the value properly, or to
> report potential ambiguities with the "no-" prefix.

Right.  The flags track whether the user negated an option, but with
the twist that both arguments given by the user and long names in the
option definition can start with "no-":

arg        option       negated
---------- ------------ -------
--index     --index     no
--index     --no-index  yes
--no-index  --index     yes
--no-index  --no-index  no

"negated" means "the opposite meaning of the defined option" here,
not "presence of no- somewhere".  --index asks for the opposite
effect of what an option defined as --no-index would do.

If OPT_UNSET is set in both, the XOR cancels it out.  E.g. "--no"
directly matches "--no-index" as an abbreviation, without negating it.
(It also matches all other negatable options, so that's only usable
for commands that have a single one of them..)

>
>>  				continue;
>>  			}
>>  			/* negation allowed? */
>>  			if (options->flags & PARSE_OPT_NONEG)
>>  				continue;
>>  			/* negated and abbreviated very much? */
>> -			if (allow_abbrev && starts_with("no-", arg)) {
>> +			if (starts_with("no-", arg)) {
>>  				flags |= OPT_UNSET;
>> -				goto is_abbreviated;
>> +				register_abbrev(p, options, flags ^ opt_flags,
>> +						&abbrev, &ambiguous);
>> +				continue;
>
> This is a slight change: we might set OPT_UNSET in flags where before we
> were prevented by the "allow_abbrev" condition, but register_abbrev will
> still be a no-op in that case, so I don't think this changes any
> behavior.

Indeed.  The continue statement starts the loop over and initializes
flags anew, and the allow_abbrev condition is checked at the top of
register_abbrev(), as you noted.

> All the other changes in this patch are straightforward. So, despite
> having to puzzle out the original logic, everything here looks good.
diff mbox series

Patch

diff --git a/parse-options.c b/parse-options.c
index 056c6b30e9..398ebaef14 100644
--- a/parse-options.c
+++ b/parse-options.c
@@ -350,14 +350,40 @@  static int is_alias(struct parse_opt_ctx_t *ctx,
 	return 0;
 }

+struct parsed_option {
+	const struct option *option;
+	enum opt_parsed flags;
+};
+
+static void register_abbrev(struct parse_opt_ctx_t *p,
+			    const struct option *option, enum opt_parsed flags,
+			    struct parsed_option *abbrev,
+			    struct parsed_option *ambiguous)
+{
+	if (p->flags & PARSE_OPT_KEEP_UNKNOWN_OPT)
+		return;
+	if (abbrev->option &&
+	    !is_alias(p, abbrev->option, option)) {
+		/*
+		 * If this is abbreviated, it is
+		 * ambiguous. So when there is no
+		 * exact match later, we need to
+		 * error out.
+		 */
+		ambiguous->option = abbrev->option;
+		ambiguous->flags = abbrev->flags;
+	}
+	abbrev->option = option;
+	abbrev->flags = flags;
+}
+
 static enum parse_opt_result parse_long_opt(
 	struct parse_opt_ctx_t *p, const char *arg,
 	const struct option *options)
 {
 	const char *arg_end = strchrnul(arg, '=');
-	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
-	enum opt_parsed abbrev_flags = OPT_LONG, ambiguous_flags = OPT_LONG;
-	int allow_abbrev = !(p->flags & PARSE_OPT_KEEP_UNKNOWN_OPT);
+	struct parsed_option abbrev = { .option = NULL, .flags = OPT_LONG };
+	struct parsed_option ambiguous = { .option = NULL, .flags = OPT_LONG };

 	for (; options->type != OPTION_END; options++) {
 		const char *rest, *long_name = options->long_name;
@@ -377,31 +403,20 @@  static enum parse_opt_result parse_long_opt(
 			rest = NULL;
 		if (!rest) {
 			/* abbreviated? */
-			if (allow_abbrev &&
-			    !strncmp(long_name, arg, arg_end - arg)) {
-is_abbreviated:
-				if (abbrev_option &&
-				    !is_alias(p, abbrev_option, options)) {
-					/*
-					 * If this is abbreviated, it is
-					 * ambiguous. So when there is no
-					 * exact match later, we need to
-					 * error out.
-					 */
-					ambiguous_option = abbrev_option;
-					ambiguous_flags = abbrev_flags;
-				}
-				abbrev_option = options;
-				abbrev_flags = flags ^ opt_flags;
+			if (!strncmp(long_name, arg, arg_end - arg)) {
+				register_abbrev(p, options, flags ^ opt_flags,
+						&abbrev, &ambiguous);
 				continue;
 			}
 			/* negation allowed? */
 			if (options->flags & PARSE_OPT_NONEG)
 				continue;
 			/* negated and abbreviated very much? */
-			if (allow_abbrev && starts_with("no-", arg)) {
+			if (starts_with("no-", arg)) {
 				flags |= OPT_UNSET;
-				goto is_abbreviated;
+				register_abbrev(p, options, flags ^ opt_flags,
+						&abbrev, &ambiguous);
+				continue;
 			}
 			/* negated? */
 			if (!starts_with(arg, "no-"))
@@ -409,12 +424,12 @@  static enum parse_opt_result parse_long_opt(
 			flags |= OPT_UNSET;
 			if (!skip_prefix(arg + 3, long_name, &rest)) {
 				/* abbreviated and negated? */
-				if (allow_abbrev &&
-				    !strncmp(long_name, arg + 3,
+				if (!strncmp(long_name, arg + 3,
 					     arg_end - arg - 3))
-					goto is_abbreviated;
-				else
-					continue;
+					register_abbrev(p, options,
+							flags ^ opt_flags,
+							&abbrev, &ambiguous);
+				continue;
 			}
 		}
 		if (*rest) {
@@ -425,24 +440,24 @@  static enum parse_opt_result parse_long_opt(
 		return get_value(p, options, flags ^ opt_flags);
 	}

-	if (disallow_abbreviated_options && (ambiguous_option || abbrev_option))
+	if (disallow_abbreviated_options && (ambiguous.option || abbrev.option))
 		die("disallowed abbreviated or ambiguous option '%.*s'",
 		    (int)(arg_end - arg), arg);

-	if (ambiguous_option) {
+	if (ambiguous.option) {
 		error(_("ambiguous option: %s "
 			"(could be --%s%s or --%s%s)"),
 			arg,
-			(ambiguous_flags & OPT_UNSET) ?  "no-" : "",
-			ambiguous_option->long_name,
-			(abbrev_flags & OPT_UNSET) ?  "no-" : "",
-			abbrev_option->long_name);
+			(ambiguous.flags & OPT_UNSET) ?  "no-" : "",
+			ambiguous.option->long_name,
+			(abbrev.flags & OPT_UNSET) ?  "no-" : "",
+			abbrev.option->long_name);
 		return PARSE_OPT_HELP;
 	}
-	if (abbrev_option) {
+	if (abbrev.option) {
 		if (*arg_end)
 			p->opt = arg_end + 1;
-		return get_value(p, abbrev_option, abbrev_flags);
+		return get_value(p, abbrev.option, abbrev.flags);
 	}
 	return PARSE_OPT_UNKNOWN;
 }