diff mbox series

[02/10] parse-options.[ch]: consistently use "enum parse_opt_flags"

Message ID patch-02.10-289bb437eb5-20210928T130905Z-avarab@gmail.com (mailing list archive)
State Superseded
Headers show
Series fix bug, use existing enums | expand

Commit Message

Ævar Arnfjörð Bjarmason Sept. 28, 2021, 1:14 p.m. UTC
Use the "enum parse_opt_flags" instead of an "int flags" as arguments
to the various functions in parse-options.c. This will help to catch
cases where we're not handling cases in switch statements, and
generally make it obvious which "flags" we're referring to in this
case.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 parse-options.c | 13 ++++++++-----
 parse-options.h |  6 ++++--
 2 files changed, 12 insertions(+), 7 deletions(-)

Comments

Junio C Hamano Sept. 29, 2021, 12:10 a.m. UTC | #1
Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:

> Use the "enum parse_opt_flags" instead of an "int flags" as arguments
> to the various functions in parse-options.c. This will help to catch
> cases where we're not handling cases in switch statements, and

I am not sure about most part this change, and the claim the second
sentence makes is certainly dubious.  Let's look at the first hunk:

> diff --git a/parse-options.c b/parse-options.c
> index 55c5821b08d..9dce8b7f8a8 100644
> --- a/parse-options.c
> +++ b/parse-options.c
> @@ -481,7 +481,8 @@ static void parse_options_check(const struct option *opts)

 static void parse_options_start_1(struct parse_opt_ctx_t *ctx,
 				  int argc, const char **argv, const char *prefix,
-				  const struct option *options, int flags)
+				  const struct option *options,
+				  enum parse_opt_flags flags)
 {
 	ctx->argc = argc;
 	ctx->argv = argv;
 	if (!(flags & PARSE_OPT_ONE_SHOT)) {
 		ctx->argc--;
 		ctx->argv++;
 	}
 	ctx->total = ctx->argc;
 	ctx->out   = argv;
 	ctx->prefix = prefix;
 	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
 	ctx->flags = flags;
 	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
 	    (flags & PARSE_OPT_STOP_AT_NON_OPTION) &&
 	    !(flags & PARSE_OPT_ONE_SHOT))
 		BUG("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
 	if ((flags & PARSE_OPT_ONE_SHOT) &&
 	    (flags & PARSE_OPT_KEEP_ARGV0))
 		BUG("Can't keep argv0 if you don't have it");
 	parse_options_check(options);
 }
 
The "flags" parameter does not take a value that is an "enum" in the
usual "enumeration" sense at all.  Even though parse_opt_flags
defines 7 distinct "enum" values, each enumerated value is a small
unsigned integer with only single bit set, the caller can throw a
value that is not among these 7 by OR'ing them together.  We would
not sensibly do

	switch (flags) {
	case PARSE_OPT_KEEP_UNKNOWN:
		...

In general, I am not all that enthusiastic for such a(n) (ab)use of
"enum" for bit patterns, much less than "enumerate all possible
values to make sure compilers would help us catch missing logic".

The "parse_opt_result" enum is the "right" kind of enumeration that
I can stand behind fully.  The hunk related to that enum in this
patch is quite reasonable and takes advantage of the fact that the
enum is meant to be the enumeration of all possible values.

Compared to it, parse_opt_flags and parse_opt_option_flags, not
really.

If we wanted to really clean up the latter two, perhaps we should
define the bit (which *can* be made to a proper "here are the all
possible values" enumeration), like this:

    enum parse_opt_flags_bit {
	PARSE_OPT_KEEP_DASHDASH_BIT = 0,
        PARSE_OPT_STOP_AT_NON_OPTION_BIT,
	PARSE_OPT_KEEP_ARGV0_BIT,
	...
	PARSE_OPT_SHELL_EVAL_BIT,
    };

and then update the users to use (1U << PARSE_OPT_$FLAG$_BIT), or
drop the pretense that it is a good idea to use enum for bit pattern
and use the CPP macro, i.e.

    #define PARSE_OPT_KEEP_DASHDASH (1U<<0)
    #define PARSE_OPT_STOP_AT_NON_OPTION (1U<<1)
    ...
    #define PARSE_OPT_SHELL_EVAL (1U<<6)

to make it clear that we do not mean these are "enumeration of
possible values".

Thanks.
Ævar Arnfjörð Bjarmason Sept. 29, 2021, 8:53 a.m. UTC | #2
On Tue, Sep 28 2021, Junio C Hamano wrote:

> Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:
>
>> Use the "enum parse_opt_flags" instead of an "int flags" as arguments
>> to the various functions in parse-options.c. This will help to catch
>> cases where we're not handling cases in switch statements, and
>
> I am not sure about most part this change, and the claim the second
> sentence makes is certainly dubious.  Let's look at the first hunk:
>
>> diff --git a/parse-options.c b/parse-options.c
>> index 55c5821b08d..9dce8b7f8a8 100644
>> --- a/parse-options.c
>> +++ b/parse-options.c
>> @@ -481,7 +481,8 @@ static void parse_options_check(const struct option *opts)
>
>  static void parse_options_start_1(struct parse_opt_ctx_t *ctx,
>  				  int argc, const char **argv, const char *prefix,
> -				  const struct option *options, int flags)
> +				  const struct option *options,
> +				  enum parse_opt_flags flags)
>  {
>  	ctx->argc = argc;
>  	ctx->argv = argv;
>  	if (!(flags & PARSE_OPT_ONE_SHOT)) {
>  		ctx->argc--;
>  		ctx->argv++;
>  	}
>  	ctx->total = ctx->argc;
>  	ctx->out   = argv;
>  	ctx->prefix = prefix;
>  	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
>  	ctx->flags = flags;
>  	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
>  	    (flags & PARSE_OPT_STOP_AT_NON_OPTION) &&
>  	    !(flags & PARSE_OPT_ONE_SHOT))
>  		BUG("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
>  	if ((flags & PARSE_OPT_ONE_SHOT) &&
>  	    (flags & PARSE_OPT_KEEP_ARGV0))
>  		BUG("Can't keep argv0 if you don't have it");
>  	parse_options_check(options);
>  }
>  
> The "flags" parameter does not take a value that is an "enum" in the
> usual "enumeration" sense at all.  Even though parse_opt_flags
> defines 7 distinct "enum" values, each enumerated value is a small
> unsigned integer with only single bit set, the caller can throw a
> value that is not among these 7 by OR'ing them together.  We would
> not sensibly do
>
> 	switch (flags) {
> 	case PARSE_OPT_KEEP_UNKNOWN:
> 		...
>
> In general, I am not all that enthusiastic for such a(n) (ab)use of
> "enum" for bit patterns, much less than "enumerate all possible
> values to make sure compilers would help us catch missing logic".

I agree that it's not as nice as enums where the fields are mutually
exclusive, since those can be checked via "case" arms, and this is
"unchecked" bitfields.

So e.g. the bug I fixed in 01/10 would not be found by a compiler I have
access to (and I don't think one currently exists).

But I think this is perfectly good use of enums, we use this
enums-as-bitfields pattern in various other places,
e.g. builtin/rebase.c's "flags", the "commit_graph_write_flags",
"expire_reflog_flags" & "todo_item_flags", just to name a few from some
quick grepping.

One advantage is that integrates nicely with some wider C
tooling. E.g. before this series, starting "git stash show" under gdb
and inspecting flags:

    (gdb) p flags
    $1 = 9

And after:

    (gdb) p flags
    $1 = (PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_UNKNOWN)

So the type information and bitfield-ness are retained.

Although you might argue that it leads you into a trap, as adding:

    flags |= PARSE_OPT_LASTARG_DEFAULT;

Will result in:

    (gdb) p flags
    $2 = (PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_UNKNOWN | PARSE_OPT_NO_INTERNAL_HELP)

I.e. it decodes the enum based on the int value & its known labels, and
it just so happens that PARSE_OPT_LASTARG_DEFAULT has the same value as
PARSE_OPT_NO_INTERNAL_HELP in an unrelated enum.

> The "parse_opt_result" enum is the "right" kind of enumeration that
> I can stand behind fully.  The hunk related to that enum in this
> patch is quite reasonable and takes advantage of the fact that the
> enum is meant to be the enumeration of all possible values.
>
> Compared to it, parse_opt_flags and parse_opt_option_flags, not
> really.
>
> If we wanted to really clean up the latter two, perhaps we should
> define the bit (which *can* be made to a proper "here are the all
> possible values" enumeration), like this:
>
>     enum parse_opt_flags_bit {
> 	PARSE_OPT_KEEP_DASHDASH_BIT = 0,
>         PARSE_OPT_STOP_AT_NON_OPTION_BIT,
> 	PARSE_OPT_KEEP_ARGV0_BIT,
> 	...
> 	PARSE_OPT_SHELL_EVAL_BIT,
>     };
>
> and then update the users to use (1U << PARSE_OPT_$FLAG$_BIT), or
> drop the pretense that it is a good idea to use enum for bit pattern
> and use the CPP macro, i.e.
>
>     #define PARSE_OPT_KEEP_DASHDASH (1U<<0)
>     #define PARSE_OPT_STOP_AT_NON_OPTION (1U<<1)
>     ...
>     #define PARSE_OPT_SHELL_EVAL (1U<<6)
>
> to make it clear that we do not mean these are "enumeration of
> possible values".

I'm not sure what the former suggestion here buys us, but the latter
will drop the type information as noted above, i.e. you'll get a:

    (gdb) p flags
    $1 = 9
Junio C Hamano Sept. 29, 2021, 3:09 p.m. UTC | #3
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> On Tue, Sep 28 2021, Junio C Hamano wrote:
>
>> Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:
>>
>>> Use the "enum parse_opt_flags" instead of an "int flags" as arguments
>>> to the various functions in parse-options.c. This will help to catch
>>> cases where we're not handling cases in switch statements, and

 ...

> But I think this is perfectly good use of enums, we use this
> enums-as-bitfields pattern in various other places,
> e.g. builtin/rebase.c's "flags", the "commit_graph_write_flags",
> "expire_reflog_flags" & "todo_item_flags", just to name a few from some
> quick grepping.

Many codepaths already misusing is not an excuse to add another ;-)

But ...

> One advantage is that integrates nicely with some wider C
> tooling. E.g. before this series, starting "git stash show" under gdb
> and inspecting flags:
>
>     (gdb) p flags
>     $1 = 9
>
> And after:
>
>     (gdb) p flags
>     $1 = (PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_UNKNOWN)

... this is a pleasant surprise---the last time I checked, debuggers
were that clever to notice that the distinct values are names for
individual bits.  I can buy this as an argument for using enums for
names for individual bits.  For this to work, obviously, variable
and struct members need to be given the appropriate type.

So I agree with the change in 2/10.

Except that one place there was a change related to a different enum
that is a true enumeration in this step.  It belongs to 3/10, I
think.  Also, the sales pitch for this step in the proposed commit
log message needs rewriting---this will "not" help to catch cases
where we're not handling cases in switch statements; if you are
selling it because you think it will help debuggers and other
tooling, let's describe it as such.

Even though I think debuggers are overrated ;-)

Thanks.
Junio C Hamano Sept. 29, 2021, 4:02 p.m. UTC | #4
Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:

> Use the "enum parse_opt_flags" instead of an "int flags" as arguments
> ...

> @@ -861,7 +864,7 @@ int parse_options(int argc, const char **argv, const char *prefix,
>  	case PARSE_OPT_NON_OPTION:
>  	case PARSE_OPT_DONE:
>  		break;
> -	default: /* PARSE_OPT_UNKNOWN */
> +	case PARSE_OPT_UNKNOWN:
>  		if (ctx.argv[0][1] == '-') {
>  			error(_("unknown option `%s'"), ctx.argv[0] + 2);
>  		} else if (isascii(*ctx.opt)) {

This part does not belong to this step or 03/10 (parse_opt_result).
It belongs to 04/10 (drop default from switching on enum).
diff mbox series

Patch

diff --git a/parse-options.c b/parse-options.c
index 55c5821b08d..9dce8b7f8a8 100644
--- a/parse-options.c
+++ b/parse-options.c
@@ -481,7 +481,8 @@  static void parse_options_check(const struct option *opts)
 
 static void parse_options_start_1(struct parse_opt_ctx_t *ctx,
 				  int argc, const char **argv, const char *prefix,
-				  const struct option *options, int flags)
+				  const struct option *options,
+				  enum parse_opt_flags flags)
 {
 	ctx->argc = argc;
 	ctx->argv = argv;
@@ -506,7 +507,8 @@  static void parse_options_start_1(struct parse_opt_ctx_t *ctx,
 
 void parse_options_start(struct parse_opt_ctx_t *ctx,
 			 int argc, const char **argv, const char *prefix,
-			 const struct option *options, int flags)
+			 const struct option *options,
+			 enum parse_opt_flags flags)
 {
 	memset(ctx, 0, sizeof(*ctx));
 	parse_options_start_1(ctx, argc, argv, prefix, options, flags);
@@ -838,8 +840,9 @@  int parse_options_end(struct parse_opt_ctx_t *ctx)
 }
 
 int parse_options(int argc, const char **argv, const char *prefix,
-		  const struct option *options, const char * const usagestr[],
-		  int flags)
+		  const struct option *options,
+		  const char * const usagestr[],
+		  enum parse_opt_flags flags)
 {
 	struct parse_opt_ctx_t ctx;
 	struct option *real_options;
@@ -861,7 +864,7 @@  int parse_options(int argc, const char **argv, const char *prefix,
 	case PARSE_OPT_NON_OPTION:
 	case PARSE_OPT_DONE:
 		break;
-	default: /* PARSE_OPT_UNKNOWN */
+	case PARSE_OPT_UNKNOWN:
 		if (ctx.argv[0][1] == '-') {
 			error(_("unknown option `%s'"), ctx.argv[0] + 2);
 		} else if (isascii(*ctx.opt)) {
diff --git a/parse-options.h b/parse-options.h
index 3a3176ae65c..fb5aafd4f7b 100644
--- a/parse-options.h
+++ b/parse-options.h
@@ -213,7 +213,8 @@  struct option {
  */
 int parse_options(int argc, const char **argv, const char *prefix,
 		  const struct option *options,
-		  const char * const usagestr[], int flags);
+		  const char * const usagestr[],
+		  enum parse_opt_flags flags);
 
 NORETURN void usage_with_options(const char * const *usagestr,
 				 const struct option *options);
@@ -270,7 +271,8 @@  struct parse_opt_ctx_t {
 
 void parse_options_start(struct parse_opt_ctx_t *ctx,
 			 int argc, const char **argv, const char *prefix,
-			 const struct option *options, int flags);
+			 const struct option *options,
+			 enum parse_opt_flags flags);
 
 int parse_options_step(struct parse_opt_ctx_t *ctx,
 		       const struct option *options,