diff mbox series

[04/23] fsmonitor: introduce `core.useBuiltinFSMonitor` to call the daemon via IPC

Message ID 7082528d8f7c1afa33e1146e3d274e044735f6a1.1617291666.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Builtin FSMonitor Feature | expand

Commit Message

Johannes Schindelin April 1, 2021, 3:40 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

The `core.fsmonitor` setting is supposed to be set to a path pointing
to a script or executable that (via the Hook API) queries an fsmonitor
process such as watchman.

We are about to implement our own fsmonitor backend, and do not want
to spawn hook processes just to query it.  Let's use `Simple IPC` to
directly communicate with the daemon (and start it if necessary),
guarded by the brand-new `core.useBuiltinFSMonitor` toggle.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 config.c        |  5 +++++
 fsmonitor.c     | 20 +++++++++++++++++---
 repo-settings.c |  3 +++
 repository.h    |  2 ++
 4 files changed, 27 insertions(+), 3 deletions(-)

Comments

Derrick Stolee April 26, 2021, 2:56 p.m. UTC | #1
On 4/1/21 11:40 AM, Johannes Schindelin via GitGitGadget wrote:> @@ -2515,6 +2515,11 @@ int git_config_get_max_percent_split_change(void)
>  
>  int repo_config_get_fsmonitor(struct repository *r)
>  {
> +	if (r->settings.use_builtin_fsmonitor > 0) {

Don't forget to run prepare_repo_settings(r) first.

> +		core_fsmonitor = "(built-in daemon)";
> +		return 1;
> +	}
> +

I found this odd, assigning a string to core_fsmonitor that
would definitely cause a problem trying to execute it as a
hook. I wondered the need for it at all, but found that
there are several places in the FS Monitor subsystem that use
core_fsmonitor as if it was a boolean, indicating whether or
not the feature is enabled at all.

A cleaner way to handle this would be to hide the data behind
a helper method, say "fsmonitor_enabled()" that could then
check a value on the repository (or index) and store the hook
value as a separate value that is only used by the hook-based
implementation.

It's probably a good idea to do that cleanup now, before we
find on accident that we missed a gap and start trying to run
this bogus string as a hook invocation.
> -static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result)
> +static int query_fsmonitor(int version, struct index_state *istate, struct strbuf *query_result)
>  {
> +	struct repository *r = istate->repo ? istate->repo : the_repository;
> +	const char *last_update = istate->fsmonitor_last_update;
>  	struct child_process cp = CHILD_PROCESS_INIT;
>  	int result;
>  
>  	if (!core_fsmonitor)
>  		return -1;

Here is an example of it being used as a boolean.

> +	if (r->settings.use_builtin_fsmonitor > 0) {
> +#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
> +		return fsmonitor_ipc__send_query(last_update, query_result);
> +#else
> +		/* Fake a trivial response. */
> +		warning(_("fsmonitor--daemon unavailable; falling back"));
> +		strbuf_add(query_result, "/", 2);
> +		return 0;
> +#endif

This seems like a case where the helper fsmonitor_ipc__is_supported()
could be used instead of compile-time macros.

(I think this is especially true when we consider the future of the
feature on Linux and the possibility of the same compiled code needing
to check run-time properties of the platform for compatibility.)

> --- a/repo-settings.c
> +++ b/repo-settings.c
> @@ -58,6 +58,9 @@ void prepare_repo_settings(struct repository *r)
>  		r->settings.core_multi_pack_index = value;
>  	UPDATE_DEFAULT_BOOL(r->settings.core_multi_pack_index, 1);
>  
> +	if (!repo_config_get_bool(r, "core.usebuiltinfsmonitor", &value) && value)
> +		r->settings.use_builtin_fsmonitor = 1;
> +

Follows the patterns of repo settings. Good.
Ævar Arnfjörð Bjarmason April 27, 2021, 9:20 a.m. UTC | #2
On Mon, Apr 26 2021, Derrick Stolee wrote:

> On 4/1/21 11:40 AM, Johannes Schindelin via GitGitGadget wrote:> @@ -2515,6 +2515,11 @@ int git_config_get_max_percent_split_change(void)
>>  
>>  int repo_config_get_fsmonitor(struct repository *r)
>>  {
>> +	if (r->settings.use_builtin_fsmonitor > 0) {
>
> Don't forget to run prepare_repo_settings(r) first.
>
>> +		core_fsmonitor = "(built-in daemon)";
>> +		return 1;
>> +	}
>> +
>
> I found this odd, assigning a string to core_fsmonitor that
> would definitely cause a problem trying to execute it as a
> hook. I wondered the need for it at all, but found that
> there are several places in the FS Monitor subsystem that use
> core_fsmonitor as if it was a boolean, indicating whether or
> not the feature is enabled at all.
>
> A cleaner way to handle this would be to hide the data behind
> a helper method, say "fsmonitor_enabled()" that could then
> check a value on the repository (or index) and store the hook
> value as a separate value that is only used by the hook-based
> implementation.
>
> It's probably a good idea to do that cleanup now, before we
> find on accident that we missed a gap and start trying to run
> this bogus string as a hook invocation.
>> -static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result)
>> +static int query_fsmonitor(int version, struct index_state *istate, struct strbuf *query_result)
>>  {
>> +	struct repository *r = istate->repo ? istate->repo : the_repository;
>> +	const char *last_update = istate->fsmonitor_last_update;
>>  	struct child_process cp = CHILD_PROCESS_INIT;
>>  	int result;
>>  
>>  	if (!core_fsmonitor)
>>  		return -1;
>
> Here is an example of it being used as a boolean.
>
>> +	if (r->settings.use_builtin_fsmonitor > 0) {
>> +#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
>> +		return fsmonitor_ipc__send_query(last_update, query_result);
>> +#else
>> +		/* Fake a trivial response. */
>> +		warning(_("fsmonitor--daemon unavailable; falling back"));
>> +		strbuf_add(query_result, "/", 2);
>> +		return 0;
>> +#endif
>
> This seems like a case where the helper fsmonitor_ipc__is_supported()
> could be used instead of compile-time macros.
>
> (I think this is especially true when we consider the future of the
> feature on Linux and the possibility of the same compiled code needing
> to check run-time properties of the platform for compatibility.)
>
>> --- a/repo-settings.c
>> +++ b/repo-settings.c
>> @@ -58,6 +58,9 @@ void prepare_repo_settings(struct repository *r)
>>  		r->settings.core_multi_pack_index = value;
>>  	UPDATE_DEFAULT_BOOL(r->settings.core_multi_pack_index, 1);
>>  
>> +	if (!repo_config_get_bool(r, "core.usebuiltinfsmonitor", &value) && value)
>> +		r->settings.use_builtin_fsmonitor = 1;
>> +
>
> Follows the patterns of repo settings. Good.

It follows the pattern, but as an aside the pattern seems bit odd. I see
it dates back to your 7211b9e7534 (repo-settings: consolidate some
config settings, 2019-08-13).

I.e. we memset() the whole thing to -1, then for most things do something like:

    if (!repo_config_get_bool(r, "gc.writecommitgraph", &value))
        r->settings.gc_write_commit_graph = value;
    UPDATE_DEFAULT_BOOL(r->settings.gc_write_commit_graph, 1);

But could do:

    if (repo_config_get_bool(r, "gc.writecommitgraph", &r->settings.gc_write_commit_graph))
        r->settings.gc_write_commit_graph = 1;

No? I.e. the repo_config_get_bool() function already returns non-zero if
we don't find it in the config.

I see the UPDATE_DEFAULT_BOOL() macro has also drifted from "set thing
default boolean" to "set any default value".
Derrick Stolee April 27, 2021, 12:42 p.m. UTC | #3
On 4/27/2021 5:20 AM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Mon, Apr 26 2021, Derrick Stolee wrote:
> 
>> On 4/1/21 11:40 AM, Johannes Schindelin via GitGitGadget wrote:> @@ -2515,6 +2515,11 @@ int git_config_get_max_percent_split_change(void)
...
>>> --- a/repo-settings.c
>>> +++ b/repo-settings.c
>>> @@ -58,6 +58,9 @@ void prepare_repo_settings(struct repository *r)
>>>  		r->settings.core_multi_pack_index = value;
>>>  	UPDATE_DEFAULT_BOOL(r->settings.core_multi_pack_index, 1);
>>>  
>>> +	if (!repo_config_get_bool(r, "core.usebuiltinfsmonitor", &value) && value)
>>> +		r->settings.use_builtin_fsmonitor = 1;
>>> +
>>
>> Follows the patterns of repo settings. Good.
> 
> It follows the pattern, but as an aside the pattern seems bit odd. I see
> it dates back to your 7211b9e7534 (repo-settings: consolidate some
> config settings, 2019-08-13).
> 
> I.e. we memset() the whole thing to -1, then for most things do something like:
> 
>     if (!repo_config_get_bool(r, "gc.writecommitgraph", &value))
>         r->settings.gc_write_commit_graph = value;
>     UPDATE_DEFAULT_BOOL(r->settings.gc_write_commit_graph, 1);
> 
> But could do:
> 
>     if (repo_config_get_bool(r, "gc.writecommitgraph", &r->settings.gc_write_commit_graph))
>         r->settings.gc_write_commit_graph = 1;
> 
> No? I.e. the repo_config_get_bool() function already returns non-zero if
> we don't find it in the config.

I see how this is fewer lines of code, but it is harder to read the intent
of the implementation. The current layout makes it clear that we set the
value from the config, if it exists, but otherwise we choose a default.

Sometimes, this choice of a default _needs_ to be deferred, for example with
the fetch_negotiation_algorithm setting, which can be set both from the
fetch.negotiationAlgorithm config, but also the feature.experimental config.

However, perhaps it would be better still for these one-off requests to
create a new macro, say USE_CONFIG_OR_DEFAULT_BOOL() that fills a value
from config _or_ sets the given default:

#define USE_CONFIG_OR_DEFAULT_BOOL(r, v, s, d) \
	if (repo_config_get_bool(r, s, &v)) \
		v = d

And then for this example we would write

	USE_CONFIG_OR_DEFAULT_BOOL(r, r->settings.core_commit_graph,
				   "core.commitgraph", 1);

This would work for multiple config options in this file.

> I see the UPDATE_DEFAULT_BOOL() macro has also drifted from "set thing
> default boolean" to "set any default value".
 
This is correct. I suppose it would be a good change to make some time.
Such a rename could be combined with the refactor above.

I would recommend waiting until such a change isn't conflicting with
ongoing topics, such as this one.

Thanks,
-Stolee
Ævar Arnfjörð Bjarmason April 28, 2021, 7:59 a.m. UTC | #4
On Tue, Apr 27 2021, Derrick Stolee wrote:

> On 4/27/2021 5:20 AM, Ævar Arnfjörð Bjarmason wrote:
>> 
>> On Mon, Apr 26 2021, Derrick Stolee wrote:
>> 
>>> On 4/1/21 11:40 AM, Johannes Schindelin via GitGitGadget wrote:> @@ -2515,6 +2515,11 @@ int git_config_get_max_percent_split_change(void)
> ...
>>>> --- a/repo-settings.c
>>>> +++ b/repo-settings.c
>>>> @@ -58,6 +58,9 @@ void prepare_repo_settings(struct repository *r)
>>>>  		r->settings.core_multi_pack_index = value;
>>>>  	UPDATE_DEFAULT_BOOL(r->settings.core_multi_pack_index, 1);
>>>>  
>>>> +	if (!repo_config_get_bool(r, "core.usebuiltinfsmonitor", &value) && value)
>>>> +		r->settings.use_builtin_fsmonitor = 1;
>>>> +
>>>
>>> Follows the patterns of repo settings. Good.
>> 
>> It follows the pattern, but as an aside the pattern seems bit odd. I see
>> it dates back to your 7211b9e7534 (repo-settings: consolidate some
>> config settings, 2019-08-13).
>> 
>> I.e. we memset() the whole thing to -1, then for most things do something like:
>> 
>>     if (!repo_config_get_bool(r, "gc.writecommitgraph", &value))
>>         r->settings.gc_write_commit_graph = value;
>>     UPDATE_DEFAULT_BOOL(r->settings.gc_write_commit_graph, 1);
>> 
>> But could do:
>> 
>>     if (repo_config_get_bool(r, "gc.writecommitgraph", &r->settings.gc_write_commit_graph))
>>         r->settings.gc_write_commit_graph = 1;
>> 
>> No? I.e. the repo_config_get_bool() function already returns non-zero if
>> we don't find it in the config.
>
> I see how this is fewer lines of code, but it is harder to read the intent
> of the implementation. The current [...]

That's exactly the reason I find the existing version unreadable, i.e.:

> layout makes it clear that we set the value from the config, if it
> exists, but otherwise we choose a default.

The repo_config_get_*() functions only return non-zero if the value
doesn't exist, so the pattern of:

    if (repo_config_get(..., "some.key", &value))
        value = 123;

Is idiomatic for "use 123 if some.key doesn't exist in config".

Maybe I'm missing something and that isn't true, but it seems like a
case of going out of one's way to use what the return value is going to
give you.

> Sometimes, this choice of a default _needs_ to be deferred, for example with
> the fetch_negotiation_algorithm setting, which can be set both from the
> fetch.negotiationAlgorithm config, but also the feature.experimental config.

Don't FETCH_NEGOTIATION_UNSET and UNTRACKED_CACHE_UNSET only exist as
action-at-a-distance interaction with the memset to -1 that this
function does?

I.e. it's somewhat complex state management, first we set it to
"uninit", then later act on fetch.negotiationalgorithm, and then on
feature.experimental, and then set a default only if we didn't do any of
the previous things.;

I.e. something like:

    x = -1;
    if (fetch.negotiationalgorithm is set)
    if (x != -1 && feature.experimental is set)
    if (x != -1) x = default
    settings->x = x;

As opposed to a more (to me at least) simpler:

    int x;
    if (fetch.negotiationalgorithm is set)
    else if (feature.experimental is set)
    else x = default
    settings->x = x;

> However, perhaps it would be better still for these one-off requests to
> create a new macro, say USE_CONFIG_OR_DEFAULT_BOOL() that fills a value
> from config _or_ sets the given default:
>
> #define USE_CONFIG_OR_DEFAULT_BOOL(r, v, s, d) \
> 	if (repo_config_get_bool(r, s, &v)) \
> 		v = d
>
> And then for this example we would write
>
> 	USE_CONFIG_OR_DEFAULT_BOOL(r, r->settings.core_commit_graph,
> 				   "core.commitgraph", 1);
>
> This would work for multiple config options in this file.

I came up with this:

+static void repo_env_config_bool_or_default(struct repository *r, const char *env,
+					    const char *key, int *dest, int def)
+{
+	if (env) {
+		int val = git_env_bool(env, -1);
+		if (val != -1) {
+			*dest = val;
+			return;
+		}
+	}
+	if (repo_config_get_bool(r, key, dest))
+		*dest = def;
+}

Used as e.g.:

+	repo_env_config_bool_or_default(r, NULL, "pack.usesparse",
+					&r->settings.pack_use_sparse, 1);
+	repo_env_config_bool_or_default(r, GIT_TEST_MULTI_PACK_INDEX, "core.multipackindex",
+					&r->settings.core_multi_pack_index, 1);

It works for most things there.

Using that sort of pattern also fixes e.g. a bug in your 18e449f86b7
(midx: enable core.multiPackIndex by default, 2020-09-25), where we'll
ignore a false-but-existing env config value over a true config key.

>> I see the UPDATE_DEFAULT_BOOL() macro has also drifted from "set thing
>> default boolean" to "set any default value".
>  
> This is correct. I suppose it would be a good change to make some time.
> Such a rename could be combined with the refactor above.
>
> I would recommend waiting until such a change isn't conflicting with
> ongoing topics, such as this one.

I'm not planning to work on it, but thought I'd ask/prod the original
author if they were interested :)
Jeff Hostetler April 30, 2021, 2:23 p.m. UTC | #5
On 4/26/21 10:56 AM, Derrick Stolee wrote:
> On 4/1/21 11:40 AM, Johannes Schindelin via GitGitGadget wrote:> @@ -2515,6 +2515,11 @@ int git_config_get_max_percent_split_change(void)
>>   
>>   int repo_config_get_fsmonitor(struct repository *r)
>>   {
>> +	if (r->settings.use_builtin_fsmonitor > 0) {
> 
> Don't forget to run prepare_repo_settings(r) first.
> 
>> +		core_fsmonitor = "(built-in daemon)";
>> +		return 1;
>> +	}
>> +
> 
> I found this odd, assigning a string to core_fsmonitor that
> would definitely cause a problem trying to execute it as a
> hook. I wondered the need for it at all, but found that
> there are several places in the FS Monitor subsystem that use
> core_fsmonitor as if it was a boolean, indicating whether or
> not the feature is enabled at all.
> 
> A cleaner way to handle this would be to hide the data behind
> a helper method, say "fsmonitor_enabled()" that could then
> check a value on the repository (or index) and store the hook
> value as a separate value that is only used by the hook-based
> implementation.
> 
> It's probably a good idea to do that cleanup now, before we
> find on accident that we missed a gap and start trying to run
> this bogus string as a hook invocation.

Good point.  In an earlier draft we were using that known
string as a bogus hook path to indicate that we should
call the IPC routines rather than the hook API.  But then
we added the `core.useBuiltinFSMonitor` boolean and had it
override all of the existing fsmonitor config settings.
So we don't technically need it to have a value now and can
and should stop using the pointer as a boolean.

Thanks!

>> -static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result)
>> +static int query_fsmonitor(int version, struct index_state *istate, struct strbuf *query_result)
>>   {
>> +	struct repository *r = istate->repo ? istate->repo : the_repository;
>> +	const char *last_update = istate->fsmonitor_last_update;
>>   	struct child_process cp = CHILD_PROCESS_INIT;
>>   	int result;
>>   
>>   	if (!core_fsmonitor)
>>   		return -1;
> 
> Here is an example of it being used as a boolean.
> 
>> +	if (r->settings.use_builtin_fsmonitor > 0) {
>> +#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
>> +		return fsmonitor_ipc__send_query(last_update, query_result);
>> +#else
>> +		/* Fake a trivial response. */
>> +		warning(_("fsmonitor--daemon unavailable; falling back"));
>> +		strbuf_add(query_result, "/", 2);
>> +		return 0;
>> +#endif
> 
> This seems like a case where the helper fsmonitor_ipc__is_supported()
> could be used instead of compile-time macros.
> 
> (I think this is especially true when we consider the future of the
> feature on Linux and the possibility of the same compiled code needing
> to check run-time properties of the platform for compatibility.)

Yes.

>> --- a/repo-settings.c
>> +++ b/repo-settings.c
>> @@ -58,6 +58,9 @@ void prepare_repo_settings(struct repository *r)
>>   		r->settings.core_multi_pack_index = value;
>>   	UPDATE_DEFAULT_BOOL(r->settings.core_multi_pack_index, 1);
>>   
>> +	if (!repo_config_get_bool(r, "core.usebuiltinfsmonitor", &value) && value)
>> +		r->settings.use_builtin_fsmonitor = 1;
>> +
> 
> Follows the patterns of repo settings. Good.
> 

I'm going to ignore all of the thread responses to this patch
dealing with how we acquire config settings and macros and etc.
Those issues are completely independent of FSMonitor (which is
already way too big).

Jeff
diff mbox series

Patch

diff --git a/config.c b/config.c
index 955ff4f9461d..31f2cbaf6dfb 100644
--- a/config.c
+++ b/config.c
@@ -2515,6 +2515,11 @@  int git_config_get_max_percent_split_change(void)
 
 int repo_config_get_fsmonitor(struct repository *r)
 {
+	if (r->settings.use_builtin_fsmonitor > 0) {
+		core_fsmonitor = "(built-in daemon)";
+		return 1;
+	}
+
 	if (repo_config_get_pathname(r, "core.fsmonitor", &core_fsmonitor))
 		core_fsmonitor = getenv("GIT_TEST_FSMONITOR");
 
diff --git a/fsmonitor.c b/fsmonitor.c
index 9c9b2abc9414..d7e18fc8cd47 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -3,6 +3,7 @@ 
 #include "dir.h"
 #include "ewah/ewok.h"
 #include "fsmonitor.h"
+#include "fsmonitor-ipc.h"
 #include "run-command.h"
 #include "strbuf.h"
 
@@ -148,14 +149,27 @@  void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
 /*
  * Call the query-fsmonitor hook passing the last update token of the saved results.
  */
-static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result)
+static int query_fsmonitor(int version, struct index_state *istate, struct strbuf *query_result)
 {
+	struct repository *r = istate->repo ? istate->repo : the_repository;
+	const char *last_update = istate->fsmonitor_last_update;
 	struct child_process cp = CHILD_PROCESS_INIT;
 	int result;
 
 	if (!core_fsmonitor)
 		return -1;
 
+	if (r->settings.use_builtin_fsmonitor > 0) {
+#ifdef HAVE_FSMONITOR_DAEMON_BACKEND
+		return fsmonitor_ipc__send_query(last_update, query_result);
+#else
+		/* Fake a trivial response. */
+		warning(_("fsmonitor--daemon unavailable; falling back"));
+		strbuf_add(query_result, "/", 2);
+		return 0;
+#endif
+	}
+
 	strvec_push(&cp.args, core_fsmonitor);
 	strvec_pushf(&cp.args, "%d", version);
 	strvec_pushf(&cp.args, "%s", last_update);
@@ -263,7 +277,7 @@  void refresh_fsmonitor(struct index_state *istate)
 	if (istate->fsmonitor_last_update) {
 		if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) {
 			query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION2,
-				istate->fsmonitor_last_update, &query_result);
+				istate, &query_result);
 
 			if (query_success) {
 				if (hook_version < 0)
@@ -293,7 +307,7 @@  void refresh_fsmonitor(struct index_state *istate)
 
 		if (hook_version == HOOK_INTERFACE_VERSION1) {
 			query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1,
-				istate->fsmonitor_last_update, &query_result);
+				istate, &query_result);
 		}
 
 		trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor);
diff --git a/repo-settings.c b/repo-settings.c
index f7fff0f5ab83..93aab92ff164 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -58,6 +58,9 @@  void prepare_repo_settings(struct repository *r)
 		r->settings.core_multi_pack_index = value;
 	UPDATE_DEFAULT_BOOL(r->settings.core_multi_pack_index, 1);
 
+	if (!repo_config_get_bool(r, "core.usebuiltinfsmonitor", &value) && value)
+		r->settings.use_builtin_fsmonitor = 1;
+
 	if (!repo_config_get_bool(r, "feature.manyfiles", &value) && value) {
 		UPDATE_DEFAULT_BOOL(r->settings.index_version, 4);
 		UPDATE_DEFAULT_BOOL(r->settings.core_untracked_cache, UNTRACKED_CACHE_WRITE);
diff --git a/repository.h b/repository.h
index b385ca3c94b6..7eeab871ac3e 100644
--- a/repository.h
+++ b/repository.h
@@ -41,6 +41,8 @@  struct repo_settings {
 	enum fetch_negotiation_setting fetch_negotiation_algorithm;
 
 	int core_multi_pack_index;
+
+	int use_builtin_fsmonitor;
 };
 
 struct repository {