Message ID | 6834e37066e7877646fc7c37aa79704d14381251.1685472133.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | config: remove global state from config iteration | expand |
Hi Glen On 30/05/2023 19:42, Glen Choo via GitGitGadget wrote: > From: Glen Choo <chooglen@google.com> > > ..without actually changing any of its implementations. This commit does > not build - I've split this out for readability, but post-RFC I will > squash this with the rest of the refactor + cocci changes. While this ends up with a huge amount of churn, I think that is probably inevitable if we're going to get rid of global state (I see Ævar suggested adding a separate set of callbacks but I'm not sure how that would work with chaining up to git_default_config() and it would be nice to improve our error messages with filename and line information). I've not been following this thread all that closely but a couple of thoughts crossed by mind. - is it worth making struct key_value_info opaque and provide getters for the fields so we can change the implementation in the future without having to modify every user. We could rename it config_context or something generic like that if we think it might grow in scope in the future. - (probably impractical) could we stuff the key and value into struct key_value_info so config_fn_t becomes fn(const struct key_value_info, void *data) that would get rid of all the UNUSED annotations but would mean even more churn. The advantage is that one could add functions like kvi_bool_or_int(kvi, &is_bool) and get good error messages because all the config parsing functions would all have access to location information. Best Wishes Phillip > Signed-off-by: Glen Choo <chooglen@google.com> > --- > config.c | 8 +- > config.h | 16 +- > .../coccinelle/config_fn_kvi.pending.cocci | 146 ++++++++++++++++++ > 3 files changed, 158 insertions(+), 12 deletions(-) > create mode 100644 contrib/coccinelle/config_fn_kvi.pending.cocci > > diff --git a/config.c b/config.c > index 493f47df8ae..945f4f3b77e 100644 > --- a/config.c > +++ b/config.c > @@ -489,7 +489,7 @@ static int git_config_include(const char *var, const char *value, void *data) > * Pass along all values, including "include" directives; this makes it > * possible to query information on the includes themselves. > */ > - ret = inc->fn(var, value, inc->data); > + ret = inc->fn(var, value, NULL, inc->data); > if (ret < 0) > return ret; > > @@ -671,7 +671,7 @@ static int config_parse_pair(const char *key, const char *value, > if (git_config_parse_key(key, &canonical_name, NULL)) > return -1; > > - ret = (fn(canonical_name, value, data) < 0) ? -1 : 0; > + ret = (fn(canonical_name, value, NULL, data) < 0) ? -1 : 0; > free(canonical_name); > return ret; > } > @@ -959,7 +959,7 @@ static int get_value(struct config_source *cs, config_fn_t fn, void *data, > * accurate line number in error messages. > */ > cs->linenr--; > - ret = fn(name->buf, value, data); > + ret = fn(name->buf, value, NULL, data); > if (ret >= 0) > cs->linenr++; > return ret; > @@ -2303,7 +2303,7 @@ static void configset_iter(struct config_reader *reader, struct config_set *set, > > config_reader_set_kvi(reader, values->items[value_index].util); > > - if (fn(entry->key, values->items[value_index].string, data) < 0) > + if (fn(entry->key, values->items[value_index].string, NULL, data) < 0) > git_die_config_linenr(entry->key, > reader->config_kvi->filename, > reader->config_kvi->linenr); > diff --git a/config.h b/config.h > index 247b572b37b..9d052c52c3c 100644 > --- a/config.h > +++ b/config.h > @@ -111,6 +111,13 @@ struct config_options { > } error_action; > }; > > +struct key_value_info { > + const char *filename; > + int linenr; > + enum config_origin_type origin_type; > + enum config_scope scope; > +}; > + > /** > * A config callback function takes three parameters: > * > @@ -129,7 +136,7 @@ struct config_options { > * A config callback should return 0 for success, or -1 if the variable > * could not be parsed properly. > */ > -typedef int (*config_fn_t)(const char *, const char *, void *); > +typedef int (*config_fn_t)(const char *, const char *, struct key_value_info *, void *); > > int git_default_config(const char *, const char *, void *); > > @@ -667,13 +674,6 @@ int git_config_get_expiry(const char *key, const char **output); > /* parse either "this many days" integer, or "5.days.ago" approxidate */ > int git_config_get_expiry_in_days(const char *key, timestamp_t *, timestamp_t now); > > -struct key_value_info { > - const char *filename; > - int linenr; > - enum config_origin_type origin_type; > - enum config_scope scope; > -}; > - > /** > * First prints the error message specified by the caller in `err` and then > * dies printing the line number and the file name of the highest priority > diff --git a/contrib/coccinelle/config_fn_kvi.pending.cocci b/contrib/coccinelle/config_fn_kvi.pending.cocci > new file mode 100644 > index 00000000000..d4c84599afa > --- /dev/null > +++ b/contrib/coccinelle/config_fn_kvi.pending.cocci > @@ -0,0 +1,146 @@ > +// These are safe to apply to *.c *.h builtin/*.c > + > +@ get_fn @ > +identifier fn, R; > +@@ > +( > +( > +git_config_from_file > +| > +git_config_from_file_with_options > +| > +git_config_from_mem > +| > +git_config_from_blob_oid > +| > +read_early_config > +| > +read_very_early_config > +| > +config_with_options > +| > +git_config > +| > +git_protected_config > +| > +config_from_gitmodules > +) > + (fn, ...) > +| > +repo_config(R, fn, ...) > +) > + > +@ extends get_fn @ > +identifier C1, C2, D; > +@@ > +int fn(const char *C1, const char *C2, > ++ struct key_value_info *kvi, > + void *D); > + > +@ extends get_fn @ > +@@ > +int fn(const char *, const char *, > ++ struct key_value_info *, > + void *); > + > +@ extends get_fn @ > +// Don't change fns that look like callback fns but aren't > +identifier fn2 != tar_filter_config && != git_diff_heuristic_config && > + != git_default_submodule_config && != git_color_config && > + != bundle_list_update && != parse_object_filter_config; > +identifier C1, C2, D1, D2, S; > +attribute name UNUSED; > +@@ > +int fn(const char *C1, const char *C2, > ++ struct key_value_info *kvi, > + void *D1) { > +<+... > +( > +fn2(C1, C2, > ++ kvi, > +D2); > +| > +if(fn2(C1, C2, > ++ kvi, > +D2) < 0) { ... } > +| > +return fn2(C1, C2, > ++ kvi, > +D2); > +| > +S = fn2(C1, C2, > ++ kvi, > +D2); > +) > +...+> > + } > + > +@ extends get_fn@ > +identifier C1, C2, D; > +attribute name UNUSED; > +@@ > +int fn(const char *C1, const char *C2, > ++ struct key_value_info *kvi UNUSED, > + void *D) {...} > + > + > +// The previous rules don't catch all callbacks, especially if they're defined > +// in a separate file from the git_config() call. Fix these manually. > +@@ > +identifier C1, C2, D; > +attribute name UNUSED; > +@@ > +int > +( > +git_ident_config > +| > +urlmatch_collect_fn > +| > +write_one_config > +| > +forbid_remote_url > +| > +credential_config_callback > +) > + (const char *C1, const char *C2, > ++ struct key_value_info *kvi UNUSED, > + void *D) {...} > + > +@@ > +identifier C1, C2, D, D2, S, fn2; > +@@ > +int > +( > +http_options > +| > +git_status_config > +| > +git_commit_config > +| > +git_default_core_config > +| > +grep_config > +) > + (const char *C1, const char *C2, > ++ struct key_value_info *kvi, > + void *D) { > +<+... > +( > +fn2(C1, C2, > ++ kvi, > +D2); > +| > +if(fn2(C1, C2, > ++ kvi, > +D2) < 0) { ... } > +| > +return fn2(C1, C2, > ++ kvi, > +D2); > +| > +S = fn2(C1, C2, > ++ kvi, > +D2); > +) > +...+> > + }
Phillip Wood <phillip.wood123@gmail.com> writes: > - is it worth making struct key_value_info opaque and provide getters > for the fields so we can change the implementation in the future > without having to modify every user. We could rename it > config_context or something generic like that if we think it might > grow in scope in the future. Yes! I planned to do the key_value_info -> config_context conversion when I send the first non-RFC version for this exact reason. > - (probably impractical) could we stuff the key and value into struct > key_value_info so config_fn_t becomes > fn(const struct key_value_info, void *data) > that would get rid of all the UNUSED annotations but would mean even > more churn. Some of my colleagues also suggested this off-list. I think it is impractical for this series because I don't think anyone could reasonably review with all of the added churn. At least its current form, the churn is mostly concentrated in the signatures, but performing ^this change would make the bodies quite churny too. After this series, I think it becomes somewhat feasible with coccinelle. My .cocci files were difficult to craft because we couldn't rely on the signature of config_fn_t alone to tell us if the function is actually used as config_fn_t, but after this series, we can just use the signature since config_fn_t has a struct key_value_info param. > The advantage is that one could add functions like > kvi_bool_or_int(kvi, &is_bool) and get good error messages because > all the config parsing functions would all have access to location > information. Interesting, I hadn't considered this possibility. This seems like a pretty good abstraction to me, though I worry about the feasibility since this is yet again more churn.
On 01/06/2023 17:22, Glen Choo wrote: > Phillip Wood <phillip.wood123@gmail.com> writes: > >> - is it worth making struct key_value_info opaque and provide getters >> for the fields so we can change the implementation in the future >> without having to modify every user. We could rename it >> config_context or something generic like that if we think it might >> grow in scope in the future. > > Yes! I planned to do the key_value_info -> config_context conversion > when I send the first non-RFC version for this exact reason. That's great >> - (probably impractical) could we stuff the key and value into struct >> key_value_info so config_fn_t becomes >> fn(const struct key_value_info, void *data) >> that would get rid of all the UNUSED annotations but would mean even >> more churn. > > Some of my colleagues also suggested this off-list. I think it is > impractical for this series because I don't think anyone could > reasonably review with all of the added churn. At least its current > form, the churn is mostly concentrated in the signatures, but performing > ^this change would make the bodies quite churny too. I agree that keeping the churn to the function signatures makes it bearable. I wonder though if we could make the change by doing -git_default_config(const char *key, const char *value, void *data) +git_default_config(const struct key_value_info *kvi, void *data) { + const char *key = kvi_key(kvi); + const char *value = kvi_value(kvi); + That would add to the diffstat but I think it wouldn't really be any harder to review than just changing the signature as we're not modifying any existing lines in the function body, just adding a couple of variable declarations to the start of the function. If there is an error in either of the variable declarations then the compiler will complain as "key" or "value" will end up not being declared. It would pave the way for gradually changing the function bodies to use "kvi" directly and removing "key" and "value" > After this series, I think it becomes somewhat feasible with coccinelle. > My .cocci files were difficult to craft because we couldn't rely on the > signature of config_fn_t alone to tell us if the function is actually > used as config_fn_t, but after this series, we can just use the > signature since config_fn_t has a struct key_value_info param. That's an interesting possibility, I worry though that two huge changes to the config callbacks might be one too many though. >> The advantage is that one could add functions like >> kvi_bool_or_int(kvi, &is_bool) and get good error messages because >> all the config parsing functions would all have access to location >> information. > > Interesting, I hadn't considered this possibility. This seems like a > pretty good abstraction to me, though I worry about the feasibility > since this is yet again more churn. It could be done gradually though, converting one config callback at a time once the relevant changes have been made to config.c. As an aside, I think we'd also want a couple of helpers for matching keys so we can just write kvi_match_key(kvi, "user.name") or kvi_skip_key_prefix(kvi, "core.", &p) rather than having to extract the key name first. Best Wishes Phillip
Phillip Wood <phillip.wood123@gmail.com> writes: >> Some of my colleagues also suggested this off-list. I think it is >> impractical for this series because I don't think anyone could >> reasonably review with all of the added churn. At least its current >> form, the churn is mostly concentrated in the signatures, but performing >> ^this change would make the bodies quite churny too. > > I agree that keeping the churn to the function signatures makes it > bearable. I wonder though if we could make the change by doing > > -git_default_config(const char *key, const char *value, void *data) > +git_default_config(const struct key_value_info *kvi, void *data) > { > + const char *key = kvi_key(kvi); > + const char *value = kvi_value(kvi); Ah, yes that seems reasonable to review (and most importantly for me, it is also doable with coccinelle once I figure out how :P). I also agree that it's better to do it all in one change than two. > As an aside, I think we'd also want a couple of helpers for matching > keys so we can just write kvi_match_key(kvi, "user.name") or > kvi_skip_key_prefix(kvi, "core.", &p) rather than having to extract the > key name first. Yes, and that would also abstract over implementation details like matching keys using strcasecmp() and not strcmp(). For reasons like this, I think your proposal paves the way for a harder-to-misuse API. I still have some nagging, probably irrational fear that consolidating all of the config_fn_t args is trickier to manage than adding a single key_value_info arg. It definitely *sounds* trickier, but I can't really think of a real downside. Maybe I just have to try it and send the result for others to consider.
On 02/06/2023 17:46, Glen Choo wrote: > Phillip Wood <phillip.wood123@gmail.com> writes: >> As an aside, I think we'd also want a couple of helpers for matching >> keys so we can just write kvi_match_key(kvi, "user.name") or >> kvi_skip_key_prefix(kvi, "core.", &p) rather than having to extract the >> key name first. > > Yes, and that would also abstract over implementation details like > matching keys using strcasecmp() and not strcmp(). For reasons like > this, I think your proposal paves the way for a harder-to-misuse API. > > I still have some nagging, probably irrational fear that consolidating > all of the config_fn_t args is trickier to manage than adding a single > key_value_info arg. It definitely *sounds* trickier, but I can't really > think of a real downside. > > Maybe I just have to try it and send the result for others to consider. That's probably the best way to see if it is an improvement - you can always blame me if it turns out not to be! Hopefully it isn't too much work to add enough api to be able to convert a couple of config_fn_t functions to see how it pans out. Best Wishes Phillip
Phillip Wood <phillip.wood123@gmail.com> writes: > On 02/06/2023 17:46, Glen Choo wrote: >> Phillip Wood <phillip.wood123@gmail.com> writes: >>> As an aside, I think we'd also want a couple of helpers for matching >>> keys so we can just write kvi_match_key(kvi, "user.name") or >>> kvi_skip_key_prefix(kvi, "core.", &p) rather than having to extract the >>> key name first. >> >> Yes, and that would also abstract over implementation details like >> matching keys using strcasecmp() and not strcmp(). For reasons like >> this, I think your proposal paves the way for a harder-to-misuse API. >> >> I still have some nagging, probably irrational fear that consolidating >> all of the config_fn_t args is trickier to manage than adding a single >> key_value_info arg. It definitely *sounds* trickier, but I can't really >> think of a real downside. >> >> Maybe I just have to try it and send the result for others to consider. > > That's probably the best way to see if it is an improvement - you can > always blame me if it turns out not to be! Hopefully it isn't too much > work to add enough api to be able to convert a couple of config_fn_t > functions to see how it pans out. > I was preparing a reroll that incorporated some of the suggestions here, but when I was done, I didn't really feel like it was better (or at the very least, worth the extra churn) compared to the original approach. The 3 versions I was comparing are: 1. Adding a new "struct config_context" arg to config_fn_t that only contains a .kvi member. This is basically the approach in v1 and v2 (adding an extra args) but wrapping it in a "struct config_context" so that we can grow it later. 2. Stuffing "key" and "value" into "struct key_value_info" and removing "key" and "value" from config_fn_t. 3. Creating a "struct config_context" that contains "key", "value" and "struct key_value_info" as separate members, and removing "key" and "value" from config_fn_t. I didn't try using opaque getters - I think that's nice to have, but isn't crucial to this series and introduces a lot of churn. I've uploaded an implementation of option 3. [1]. The result is quite ugly in some places. In particular, config callbacks are used to calling other config callbacks with slightly different args (e.g. massaging the "key" but keeping the "value" the same, see patches 3,5/14 for examples.), so to make this work, I ended up creating a new "config_context", copying over the relevant members, then changing the members that need to be different. I didn't pursue the refactor to its end state (e.g. some of the CLI config machinery in the earlier patches and git_config_int()/ functions that end in die_bad_number() in later patches, take "key", "value", "key_value_info" as separate args instead of a single "config_context" arg) so I haven't reaped all of the benefits, but I thought the range-diff was getting churny enough that I wasn't interested in pursuing the idea further. I toyed with option 2 for a bit. Unsurprisingly, it requires even more copying of members, though in some ways, it's better than option 3 because callers only have to initialize one "struct key_value_info" instead of "struct key_value_info" _and_ "struct config_context". However, it requires reworking the machinery quite a lot because "key_value_info" is currently used _just_ for config source information, and there are several cases where we need to propagate the config source information separately from the key/value (e.g. the configsets store "key" in a hashmap key, "value" in a string_list_item.string, and "key_value_info" in a string_list_item.util). I'm convinced that this is better interface-wise than option 3, but the required effort is high enough that I don't think we should do this unless we see a real need for it. With that in mind, I'm tempted to continue with option 1 for now, but let me know if I'm missing something. [1] https://github.com/git/git/compare/master...chooglen:git:config/no-global-combined-kv-context?expand=1
diff --git a/config.c b/config.c index 493f47df8ae..945f4f3b77e 100644 --- a/config.c +++ b/config.c @@ -489,7 +489,7 @@ static int git_config_include(const char *var, const char *value, void *data) * Pass along all values, including "include" directives; this makes it * possible to query information on the includes themselves. */ - ret = inc->fn(var, value, inc->data); + ret = inc->fn(var, value, NULL, inc->data); if (ret < 0) return ret; @@ -671,7 +671,7 @@ static int config_parse_pair(const char *key, const char *value, if (git_config_parse_key(key, &canonical_name, NULL)) return -1; - ret = (fn(canonical_name, value, data) < 0) ? -1 : 0; + ret = (fn(canonical_name, value, NULL, data) < 0) ? -1 : 0; free(canonical_name); return ret; } @@ -959,7 +959,7 @@ static int get_value(struct config_source *cs, config_fn_t fn, void *data, * accurate line number in error messages. */ cs->linenr--; - ret = fn(name->buf, value, data); + ret = fn(name->buf, value, NULL, data); if (ret >= 0) cs->linenr++; return ret; @@ -2303,7 +2303,7 @@ static void configset_iter(struct config_reader *reader, struct config_set *set, config_reader_set_kvi(reader, values->items[value_index].util); - if (fn(entry->key, values->items[value_index].string, data) < 0) + if (fn(entry->key, values->items[value_index].string, NULL, data) < 0) git_die_config_linenr(entry->key, reader->config_kvi->filename, reader->config_kvi->linenr); diff --git a/config.h b/config.h index 247b572b37b..9d052c52c3c 100644 --- a/config.h +++ b/config.h @@ -111,6 +111,13 @@ struct config_options { } error_action; }; +struct key_value_info { + const char *filename; + int linenr; + enum config_origin_type origin_type; + enum config_scope scope; +}; + /** * A config callback function takes three parameters: * @@ -129,7 +136,7 @@ struct config_options { * A config callback should return 0 for success, or -1 if the variable * could not be parsed properly. */ -typedef int (*config_fn_t)(const char *, const char *, void *); +typedef int (*config_fn_t)(const char *, const char *, struct key_value_info *, void *); int git_default_config(const char *, const char *, void *); @@ -667,13 +674,6 @@ int git_config_get_expiry(const char *key, const char **output); /* parse either "this many days" integer, or "5.days.ago" approxidate */ int git_config_get_expiry_in_days(const char *key, timestamp_t *, timestamp_t now); -struct key_value_info { - const char *filename; - int linenr; - enum config_origin_type origin_type; - enum config_scope scope; -}; - /** * First prints the error message specified by the caller in `err` and then * dies printing the line number and the file name of the highest priority diff --git a/contrib/coccinelle/config_fn_kvi.pending.cocci b/contrib/coccinelle/config_fn_kvi.pending.cocci new file mode 100644 index 00000000000..d4c84599afa --- /dev/null +++ b/contrib/coccinelle/config_fn_kvi.pending.cocci @@ -0,0 +1,146 @@ +// These are safe to apply to *.c *.h builtin/*.c + +@ get_fn @ +identifier fn, R; +@@ +( +( +git_config_from_file +| +git_config_from_file_with_options +| +git_config_from_mem +| +git_config_from_blob_oid +| +read_early_config +| +read_very_early_config +| +config_with_options +| +git_config +| +git_protected_config +| +config_from_gitmodules +) + (fn, ...) +| +repo_config(R, fn, ...) +) + +@ extends get_fn @ +identifier C1, C2, D; +@@ +int fn(const char *C1, const char *C2, ++ struct key_value_info *kvi, + void *D); + +@ extends get_fn @ +@@ +int fn(const char *, const char *, ++ struct key_value_info *, + void *); + +@ extends get_fn @ +// Don't change fns that look like callback fns but aren't +identifier fn2 != tar_filter_config && != git_diff_heuristic_config && + != git_default_submodule_config && != git_color_config && + != bundle_list_update && != parse_object_filter_config; +identifier C1, C2, D1, D2, S; +attribute name UNUSED; +@@ +int fn(const char *C1, const char *C2, ++ struct key_value_info *kvi, + void *D1) { +<+... +( +fn2(C1, C2, ++ kvi, +D2); +| +if(fn2(C1, C2, ++ kvi, +D2) < 0) { ... } +| +return fn2(C1, C2, ++ kvi, +D2); +| +S = fn2(C1, C2, ++ kvi, +D2); +) +...+> + } + +@ extends get_fn@ +identifier C1, C2, D; +attribute name UNUSED; +@@ +int fn(const char *C1, const char *C2, ++ struct key_value_info *kvi UNUSED, + void *D) {...} + + +// The previous rules don't catch all callbacks, especially if they're defined +// in a separate file from the git_config() call. Fix these manually. +@@ +identifier C1, C2, D; +attribute name UNUSED; +@@ +int +( +git_ident_config +| +urlmatch_collect_fn +| +write_one_config +| +forbid_remote_url +| +credential_config_callback +) + (const char *C1, const char *C2, ++ struct key_value_info *kvi UNUSED, + void *D) {...} + +@@ +identifier C1, C2, D, D2, S, fn2; +@@ +int +( +http_options +| +git_status_config +| +git_commit_config +| +git_default_core_config +| +grep_config +) + (const char *C1, const char *C2, ++ struct key_value_info *kvi, + void *D) { +<+... +( +fn2(C1, C2, ++ kvi, +D2); +| +if(fn2(C1, C2, ++ kvi, +D2) < 0) { ... } +| +return fn2(C1, C2, ++ kvi, +D2); +| +S = fn2(C1, C2, ++ kvi, +D2); +) +...+> + }