diff mbox series

[4/8] builtin/config: collect "value_regexp" data in a struct

Message ID 336eaa77e4974f84ea1eef473672e1d300f3a43d.1573670565.git.martin.agren@gmail.com (mailing list archive)
State New, archived
Headers show
Series builtin/config: canonicalize "value_regex" with `--type=bool[-or-int]` | expand

Commit Message

Martin Ågren Nov. 13, 2019, 6:55 p.m. UTC
`git config` can take an optional "value_regexp". Collect the
`regex_t`-pointer and the `do_not_match` flag into a new `struct
cmd_line_value`.

Rather than signalling/judging presence of a regexp by the NULL-ness of
the pointer, introduce a `mode` enum. After this commit, we just have
two modes, "none" and "regexp", but we will gain another one in the next
commit.

Signed-off-by: Martin Ågren <martin.agren@gmail.com>
---
 builtin/config.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

Comments

Junio C Hamano Nov. 21, 2019, 5:22 a.m. UTC | #1
Martin Ågren <martin.agren@gmail.com> writes:

> `git config` can take an optional "value_regexp". Collect the
> `regex_t`-pointer and the `do_not_match` flag into a new `struct
> cmd_line_value`.

A "struct cmd_line_value" sounded, to me at least during my first
reading, as if it is about all command line options, but that is not
at all what you meant to imply.  Is this only about the optional
value-regexp (if so perhaps calling it "value_regexp_option" would
have helped me avoid such a misunderstanding)?

> Rather than signalling/judging presence of a regexp by the NULL-ness of
> the pointer, introduce a `mode` enum.

OK.  Tangentially this makes readers wonder why the existing code
for key_regexp does not follow the same "NULL-ness" pattern but has
a separate use_key_regexp boolean.  It appears that the original
code is quite confused---it is totally outside the scope of this
series to clean it up and inject sanity into it though ;-)

>  static regex_t *key_regexp;
> -static regex_t *regexp;
> +static struct {
> +	enum { none, regexp } mode;

We often use the same identifier for a struct and an instance of the
struct, taking advantage of the fact that they live in separate
namespaces, but lowercase enumerated values like 'regexp' that
collides with the field name (and possibly a variable name used
elsewhere) smells a bit too much.

> +	regex_t *regexp;
> +	int do_not_match; /* used with `regexp` */
> +} cmd_line_value;
>  static int show_keys;
>  static int omit_values;
>  static int use_key_regexp;

> @@ -283,19 +288,21 @@ static int collect_config(const char *key_, const char *value_, void *cb)
>  static int handle_value_regex(const char *regex_)
>  {
>  	if (!regex_) {
> -		regexp = NULL;
> +		cmd_line_value.mode = none;
>  		return 0;

Now we are back to relying on cmd_line_value.regexp staying to be
NULL after initialized, which is the state before the previous
patch.  If the end result is correct, then it is OK, I think, but
then the previous step shouldn't have added the NULL assignment here
in the first place.

>  	}
>  
> +	cmd_line_value.mode = regexp;
> +
>  	if (regex_[0] == '!') {
> -		do_not_match = 1;
> +		cmd_line_value.do_not_match = 1;
>  		regex_++;
>  	}
>  
> -	regexp = (regex_t*)xmalloc(sizeof(regex_t));
> -	if (regcomp(regexp, regex_, REG_EXTENDED)) {
> +	cmd_line_value.regexp = xmalloc(sizeof(*cmd_line_value.regexp));
> +	if (regcomp(cmd_line_value.regexp, regex_, REG_EXTENDED)) {
>  		error(_("invalid pattern: %s"), regex_);
> -		FREE_AND_NULL(regexp);
> +		FREE_AND_NULL(cmd_line_value.regexp);

Hmph.  !regexp in old code should mean cmd_line_value.mode==regexp
in the new world order after this patch is applied, no?  Should we
be treaking the mode field here before we leave?  I think it should
not matter, but thought it wouldn't hurt to ask.

In collect_config(), cmd_line_value.regexp is blindly passed to
regexec(3) as long as cmd_line_value.mode==regexp, so the invariant
"when .mode is regexp, .regexp must be valid, or collect_config() would
never be called for such cmd_line_value" is rather important to
avoid crashing ;-)

>  		return CONFIG_INVALID_PATTERN;
>  	}
>  
> @@ -372,9 +379,9 @@ static int get_value(const char *key_, const char *regex_)
>  		regfree(key_regexp);
>  		free(key_regexp);
>  	}
> -	if (regexp) {
> -		regfree(regexp);
> -		free(regexp);
> +	if (cmd_line_value.regexp) {
> +		regfree(cmd_line_value.regexp);
> +		free(cmd_line_value.regexp);

Likewise.

>  	}
>  
>  	return ret;
Martin Ågren Nov. 21, 2019, 7:55 p.m. UTC | #2
On Thu, 21 Nov 2019 at 06:22, Junio C Hamano <gitster@pobox.com> wrote:
>
> Martin Ågren <martin.agren@gmail.com> writes:
>
> > `git config` can take an optional "value_regexp". Collect the
> > `regex_t`-pointer and the `do_not_match` flag into a new `struct
> > cmd_line_value`.
>
> A "struct cmd_line_value" sounded, to me at least during my first
> reading, as if it is about all command line options, but that is not
> at all what you meant to imply.  Is this only about the optional
> value-regexp (if so perhaps calling it "value_regexp_option" would
> have helped me avoid such a misunderstanding)?

Yes, that's right. Your suggested name is better. Thanks.

> > Rather than signalling/judging presence of a regexp by the NULL-ness of
> > the pointer, introduce a `mode` enum.
>
> OK.  Tangentially this makes readers wonder why the existing code
> for key_regexp does not follow the same "NULL-ness" pattern but has
> a separate use_key_regexp boolean.  It appears that the original
> code is quite confused---it is totally outside the scope of this
> series to clean it up and inject sanity into it though ;-)

Yeah, I considered doing such a cleanup, but opted to try and stay
focused.

> >  static regex_t *key_regexp;
> > -static regex_t *regexp;
> > +static struct {
> > +     enum { none, regexp } mode;
>
> We often use the same identifier for a struct and an instance of the
> struct, taking advantage of the fact that they live in separate
> namespaces, but lowercase enumerated values like 'regexp' that
> collides with the field name (and possibly a variable name used
> elsewhere) smells a bit too much.

Ok, thanks for sanity-checking.

> > +     regex_t *regexp;
> > +     int do_not_match; /* used with `regexp` */
> > +} cmd_line_value;
> >  static int show_keys;
> >  static int omit_values;
> >  static int use_key_regexp;
>
> > @@ -283,19 +288,21 @@ static int collect_config(const char *key_, const char *value_, void *cb)
> >  static int handle_value_regex(const char *regex_)
> >  {
> >       if (!regex_) {
> > -             regexp = NULL;
> > +             cmd_line_value.mode = none;
> >               return 0;
>
> Now we are back to relying on cmd_line_value.regexp staying to be
> NULL after initialized, which is the state before the previous
> patch.  If the end result is correct, then it is OK, I think, but
> then the previous step shouldn't have added the NULL assignment here
> in the first place.

Ok, noted.

As I wrote in my reply there, that made the whole thing not a 100%
refactoring anyway. I'll drop that one.

> > +     cmd_line_value.mode = regexp;
> > +
> >       if (regex_[0] == '!') {
> > -             do_not_match = 1;
> > +             cmd_line_value.do_not_match = 1;
> >               regex_++;
> >       }
> >
> > -     regexp = (regex_t*)xmalloc(sizeof(regex_t));
> > -     if (regcomp(regexp, regex_, REG_EXTENDED)) {
> > +     cmd_line_value.regexp = xmalloc(sizeof(*cmd_line_value.regexp));
> > +     if (regcomp(cmd_line_value.regexp, regex_, REG_EXTENDED)) {
> >               error(_("invalid pattern: %s"), regex_);
> > -             FREE_AND_NULL(regexp);
> > +             FREE_AND_NULL(cmd_line_value.regexp);
>
> Hmph.  !regexp in old code should mean cmd_line_value.mode==regexp
> in the new world order after this patch is applied, no?  Should we
> be treaking the mode field here before we leave?  I think it should
> not matter, but thought it wouldn't hurt to ask.
>
> In collect_config(), cmd_line_value.regexp is blindly passed to
> regexec(3) as long as cmd_line_value.mode==regexp, so the invariant
> "when .mode is regexp, .regexp must be valid, or collect_config() would
> never be called for such cmd_line_value" is rather important to
> avoid crashing ;-)

Good point. No-one will be looking at the struct when we bail out here,
but we're just one missing "if" away from that changing... Might as well
try to leave things in a sane state to reduce the possibility of this
biting us in the future.

> > @@ -372,9 +379,9 @@ static int get_value(const char *key_, const char *regex_)
> >               regfree(key_regexp);
> >               free(key_regexp);
> >       }
> > -     if (regexp) {
> > -             regfree(regexp);
> > -             free(regexp);
> > +     if (cmd_line_value.regexp) {
> > +             regfree(cmd_line_value.regexp);
> > +             free(cmd_line_value.regexp);
>
> Likewise.

Thanks.


Martin
Junio C Hamano Nov. 22, 2019, 6:30 a.m. UTC | #3
Martin Ågren <martin.agren@gmail.com> writes:

>> > +static struct {
>> > +     enum { none, regexp } mode;
>>
>> We often use the same identifier for a struct and an instance of the
>> struct, taking advantage of the fact that they live in separate
>> namespaces, but lowercase enumerated values like 'regexp' that
>> collides with the field name (and possibly a variable name used
>> elsewhere) smells a bit too much.
>
> Ok, thanks for sanity-checking.
>
>> > +     regex_t *regexp;
>> > +     int do_not_match; /* used with `regexp` */
>> > +} cmd_line_value;

I _might_ want to take this back.  A pattern that uses the "mode" to
switch among the possibilities in a union, i.e.

	struct {
		enum {
			<something>_none,
			<something>_regexp,
			<something>_bool,
			<something>_int,
		} mode;
                union {
			<type-used-when-it-is-regexp> regexp;
			<type-used-when-it-is-bool> bool;
			<type-used-when-it-is-int> int;
		} u;
	};

may not be too bad.  So I do not strongly mind the lowercase.

But I still do mind an overly bland names for identifiers in an
enum, as enum is not quite a type on its own ('regexp' in one enum
may collide with the same identifier in another enum)..

Thanks.
diff mbox series

Patch

diff --git a/builtin/config.c b/builtin/config.c
index e675ae0640..d812e73e2b 100644
--- a/builtin/config.c
+++ b/builtin/config.c
@@ -14,12 +14,15 @@  static const char *const builtin_config_usage[] = {
 
 static char *key;
 static regex_t *key_regexp;
-static regex_t *regexp;
+static struct {
+	enum { none, regexp } mode;
+	regex_t *regexp;
+	int do_not_match; /* used with `regexp` */
+} cmd_line_value;
 static int show_keys;
 static int omit_values;
 static int use_key_regexp;
 static int do_all;
-static int do_not_match;
 static char delim = '=';
 static char key_delim = ' ';
 static char term = '\n';
@@ -270,8 +273,10 @@  static int collect_config(const char *key_, const char *value_, void *cb)
 		return 0;
 	if (use_key_regexp && regexec(key_regexp, key_, 0, NULL, 0))
 		return 0;
-	if (regexp != NULL &&
-	    (do_not_match ^ !!regexec(regexp, (value_?value_:""), 0, NULL, 0)))
+	if (cmd_line_value.mode == regexp &&
+	    (cmd_line_value.do_not_match ^
+	     !!regexec(cmd_line_value.regexp, value_ ? value_ : "",
+		       0, NULL, 0)))
 		return 0;
 
 	ALLOC_GROW(values->items, values->nr + 1, values->alloc);
@@ -283,19 +288,21 @@  static int collect_config(const char *key_, const char *value_, void *cb)
 static int handle_value_regex(const char *regex_)
 {
 	if (!regex_) {
-		regexp = NULL;
+		cmd_line_value.mode = none;
 		return 0;
 	}
 
+	cmd_line_value.mode = regexp;
+
 	if (regex_[0] == '!') {
-		do_not_match = 1;
+		cmd_line_value.do_not_match = 1;
 		regex_++;
 	}
 
-	regexp = (regex_t*)xmalloc(sizeof(regex_t));
-	if (regcomp(regexp, regex_, REG_EXTENDED)) {
+	cmd_line_value.regexp = xmalloc(sizeof(*cmd_line_value.regexp));
+	if (regcomp(cmd_line_value.regexp, regex_, REG_EXTENDED)) {
 		error(_("invalid pattern: %s"), regex_);
-		FREE_AND_NULL(regexp);
+		FREE_AND_NULL(cmd_line_value.regexp);
 		return CONFIG_INVALID_PATTERN;
 	}
 
@@ -372,9 +379,9 @@  static int get_value(const char *key_, const char *regex_)
 		regfree(key_regexp);
 		free(key_regexp);
 	}
-	if (regexp) {
-		regfree(regexp);
-		free(regexp);
+	if (cmd_line_value.regexp) {
+		regfree(cmd_line_value.regexp);
+		free(cmd_line_value.regexp);
 	}
 
 	return ret;