diff mbox series

[v3,14/34] fsmonitor--daemon: implement 'start' command

Message ID 9fe902aad87f1192705fb69ea212a2d066d0286d.1625150864.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Builtin FSMonitor Feature | expand

Commit Message

Jeff Hostetler July 1, 2021, 2:47 p.m. UTC
From: Jeff Hostetler <jeffhost@microsoft.com>

Implement 'git fsmonitor--daemon start' command.  This command
tries to start a daemon in the background.  It creates a background
process to run the daemon.

The updated daemon does not actually do anything yet because the
platform backends are still just stubs.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 208 ++++++++++++++++++++++++++++++++++++
 1 file changed, 208 insertions(+)

Comments

Ævar Arnfjörð Bjarmason July 1, 2021, 10:18 p.m. UTC | #1
On Thu, Jul 01 2021, Jeff Hostetler via GitGitGadget wrote:

> +#ifdef GIT_WINDOWS_NATIVE
> +/*
> + * Create a background process to run the daemon.  It should be completely
> + * disassociated from the terminal.
> + *
> + * Conceptually like `daemonize()` but different because Windows does not
> + * have `fork(2)`.  Spawn a normal Windows child process but without the
> + * limitations of `start_command()` and `finish_command()`.
> + *
> + * The child process execs the "git fsmonitor--daemon run" command.
> + *
> + * The current process returns so that the caller can wait for the child
> + * to startup before exiting.
> + */
> +static int spawn_background_fsmonitor_daemon(pid_t *pid)
> +{
> +	char git_exe[MAX_PATH];
> +	struct strvec args = STRVEC_INIT;
> +	int in, out;
> +
> +	GetModuleFileNameA(NULL, git_exe, MAX_PATH);
> +
> +	in = open("/dev/null", O_RDONLY);
> +	out = open("/dev/null", O_WRONLY);
> +
> +	strvec_push(&args, git_exe);
> +	strvec_push(&args, "fsmonitor--daemon");
> +	strvec_push(&args, "run");
> +	strvec_pushf(&args, "--ipc-threads=%d", fsmonitor__ipc_threads);
> +
> +	*pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out);
> +	close(in);
> +	close(out);
> +
> +	strvec_clear(&args);
> +
> +	if (*pid < 0)
> +		return error(_("could not spawn fsmonitor--daemon in the background"));
> +
> +	return 0;
> +}
> +#else
> +/*
> + * Create a background process to run the daemon.  It should be completely
> + * disassociated from the terminal.
> + *
> + * This is adapted from `daemonize()`.  Use `fork()` to directly
> + * create and run the daemon in the child process.
> + *
> + * The fork-child can just call the run code; it does not need to exec
> + * it.
> + *
> + * The fork-parent returns the child PID so that we can wait for the
> + * child to startup before exiting.
> + */
> +static int spawn_background_fsmonitor_daemon(pid_t *pid)
> +{
> +	*pid = fork();
> +
> +	switch (*pid) {
> +	case 0:
> +		if (setsid() == -1)
> +			error_errno(_("setsid failed"));
> +		close(0);
> +		close(1);
> +		close(2);
> +		sanitize_stdfds();
> +
> +		return !!fsmonitor_run_daemon();
> +
> +	case -1:
> +		return error_errno(_("could not spawn fsmonitor--daemon in the background"));
> +
> +	default:
> +		return 0;
> +	}
> +}
> +#endif

The spawn_background_fsmonitor_daemon() function here is almost the same
as daemonize(). I wonder if this & the Windows-specific one you have
here can't be refactored into an API from what's now in setup.c.

Then we could make builtin/gc.c and daemon.c use that, so Windows could
have background GC, and we'd have a more battle-tested central codepath
for this tricky bit.

It seems to me like the only limitations on it are to have this return
slightly more general things (e.g. not set its own errors, return
structured data), and maybe some callback for what to do in the
child/parent.

> +/*
> + * This is adapted from `wait_or_whine()`.  Watch the child process and
> + * let it get started and begin listening for requests on the socket
> + * before reporting our success.
> + */
> +static int wait_for_background_startup(pid_t pid_child)
> +{
> +	int status;
> +	pid_t pid_seen;
> +	enum ipc_active_state s;
> +	time_t time_limit, now;
> +
> +	time(&time_limit);
> +	time_limit += fsmonitor__start_timeout_sec;
> +
> +	for (;;) {
> +		pid_seen = waitpid(pid_child, &status, WNOHANG);
> +
> +		if (pid_seen == -1)
> +			return error_errno(_("waitpid failed"));
> +		else if (pid_seen == 0) {
> +			/*
> +			 * The child is still running (this should be
> +			 * the normal case).  Try to connect to it on
> +			 * the socket and see if it is ready for
> +			 * business.
> +			 *
> +			 * If there is another daemon already running,
> +			 * our child will fail to start (possibly
> +			 * after a timeout on the lock), but we don't
> +			 * care (who responds) if the socket is live.
> +			 */
> +			s = fsmonitor_ipc__get_state();
> +			if (s == IPC_STATE__LISTENING)
> +				return 0;
> +
> +			time(&now);
> +			if (now > time_limit)
> +				return error(_("fsmonitor--daemon not online yet"));
> +		} else if (pid_seen == pid_child) {
> +			/*
> +			 * The new child daemon process shutdown while
> +			 * it was starting up, so it is not listening
> +			 * on the socket.
> +			 *
> +			 * Try to ping the socket in the odd chance
> +			 * that another daemon started (or was already
> +			 * running) while our child was starting.
> +			 *
> +			 * Again, we don't care who services the socket.
> +			 */
> +			s = fsmonitor_ipc__get_state();
> +			if (s == IPC_STATE__LISTENING)
> +				return 0;
> +
> +			/*
> +			 * We don't care about the WEXITSTATUS() nor
> +			 * any of the WIF*(status) values because
> +			 * `cmd_fsmonitor__daemon()` does the `!!result`
> +			 * trick on all function return values.
> +			 *
> +			 * So it is sufficient to just report the
> +			 * early shutdown as an error.
> +			 */
> +			return error(_("fsmonitor--daemon failed to start"));
> +		} else
> +			return error(_("waitpid is confused"));
> +	}
> +}

Ditto this. could we extend the wait_or_whine() function (or some
extended version thereof) to do what you need with callbacks?

It seems the main difference is just being able to pass down a flag for
waitpid(), and the loop needing to check EINTR or not depending on
whether WNOHANG is passed.

For e.g. the "We don't care about the WEXITSTATUS()" you'd get that
behavior with an adjusted wait_or_whine(). Wouldn't it be better to
report what exit status it exits with e.g. if the top-level process is
signalled? We do so in trace2 for other things we spawn...
Johannes Schindelin July 5, 2021, 9:52 p.m. UTC | #2
Hi Ævar,

On Fri, 2 Jul 2021, Ævar Arnfjörð Bjarmason wrote:

> On Thu, Jul 01 2021, Jeff Hostetler via GitGitGadget wrote:
>
> > +#ifdef GIT_WINDOWS_NATIVE
> > +/*
> > + * Create a background process to run the daemon.  It should be completely
> > + * disassociated from the terminal.
> > + *
> > + * Conceptually like `daemonize()` but different because Windows does not
> > + * have `fork(2)`.  Spawn a normal Windows child process but without the
> > + * limitations of `start_command()` and `finish_command()`.
> > + *
> > + * The child process execs the "git fsmonitor--daemon run" command.
> > + *
> > + * The current process returns so that the caller can wait for the child
> > + * to startup before exiting.
> > + */
> > +static int spawn_background_fsmonitor_daemon(pid_t *pid)
> > +{
> > +	char git_exe[MAX_PATH];
> > +	struct strvec args = STRVEC_INIT;
> > +	int in, out;
> > +
> > +	GetModuleFileNameA(NULL, git_exe, MAX_PATH);
> > +
> > +	in = open("/dev/null", O_RDONLY);
> > +	out = open("/dev/null", O_WRONLY);
> > +
> > +	strvec_push(&args, git_exe);
> > +	strvec_push(&args, "fsmonitor--daemon");
> > +	strvec_push(&args, "run");
> > +	strvec_pushf(&args, "--ipc-threads=%d", fsmonitor__ipc_threads);
> > +
> > +	*pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out);
> > +	close(in);
> > +	close(out);
> > +
> > +	strvec_clear(&args);
> > +
> > +	if (*pid < 0)
> > +		return error(_("could not spawn fsmonitor--daemon in the background"));
> > +
> > +	return 0;
> > +}
> > +#else
> > +/*
> > + * Create a background process to run the daemon.  It should be completely
> > + * disassociated from the terminal.
> > + *
> > + * This is adapted from `daemonize()`.  Use `fork()` to directly
> > + * create and run the daemon in the child process.
> > + *
> > + * The fork-child can just call the run code; it does not need to exec
> > + * it.
> > + *
> > + * The fork-parent returns the child PID so that we can wait for the
> > + * child to startup before exiting.
> > + */
> > +static int spawn_background_fsmonitor_daemon(pid_t *pid)
> > +{
> > +	*pid = fork();
> > +
> > +	switch (*pid) {
> > +	case 0:
> > +		if (setsid() == -1)
> > +			error_errno(_("setsid failed"));
> > +		close(0);
> > +		close(1);
> > +		close(2);
> > +		sanitize_stdfds();
> > +
> > +		return !!fsmonitor_run_daemon();
> > +
> > +	case -1:
> > +		return error_errno(_("could not spawn fsmonitor--daemon in the background"));
> > +
> > +	default:
> > +		return 0;
> > +	}
> > +}
> > +#endif
>
> The spawn_background_fsmonitor_daemon() function here is almost the same
> as daemonize().

Yes, the code comment above that function even says that it was adapted
from `daemonize()`.

And above that, of course, is a _completely different_ implementation that
works on Windows (you will notice that this is in stark contrast of
Windows, where the `daemonize()` function will simply fail with `ENOSYS`).

> I wonder if this & the Windows-specific one you have here can't be
> refactored into an API from what's now in setup.c.

Given that there is no `fork()` on Windows (which has been the subject of
many a message to this mailing list), I think the answer to this question
of yours is a resounding "No".

> Then we could make builtin/gc.c and daemon.c use that, so Windows could
> have background GC, and we'd have a more battle-tested central codepath
> for this tricky bit.

Please. Not _more_ sidetracks.

The issue of getting `git gc --auto` to daemonize on Windows is a rather
complicated one. I won't bore this list with the details, but link to
https://github.com/git-for-windows/git/issues/2221#issuecomment-542589590
(a ~950 word analysis of the problem).

> It seems to me like the only limitations on it are to have this return
> slightly more general things (e.g. not set its own errors, return
> structured data), and maybe some callback for what to do in the
> child/parent.

And one version doesn't `die()`. Nor does it call `exit(0)` in the parent
process. But it calls `fsmonitor_listen()` in the child process. And if
you wanted to refactor `daemonize()` to do all that, it would have to be
renamed (because it does no longer _necessarily_ daemonize), and it would
have to have a `gentle` flag, and it would somehow have to indicate in its
return value whether `0` means that the parent process returned
successfully or the client process. And soon we'll end up with a function
that is both longer and more unreadable than what we have right now.

>
> > +/*
> > + * This is adapted from `wait_or_whine()`.  Watch the child process and
> > + * let it get started and begin listening for requests on the socket
> > + * before reporting our success.
> > + */
> > +static int wait_for_background_startup(pid_t pid_child)
> > +{
> > +	int status;
> > +	pid_t pid_seen;
> > +	enum ipc_active_state s;
> > +	time_t time_limit, now;
> > +
> > +	time(&time_limit);
> > +	time_limit += fsmonitor__start_timeout_sec;
> > +
> > +	for (;;) {
> > +		pid_seen = waitpid(pid_child, &status, WNOHANG);
> > +
> > +		if (pid_seen == -1)
> > +			return error_errno(_("waitpid failed"));
> > +		else if (pid_seen == 0) {
> > +			/*
> > +			 * The child is still running (this should be
> > +			 * the normal case).  Try to connect to it on
> > +			 * the socket and see if it is ready for
> > +			 * business.
> > +			 *
> > +			 * If there is another daemon already running,
> > +			 * our child will fail to start (possibly
> > +			 * after a timeout on the lock), but we don't
> > +			 * care (who responds) if the socket is live.
> > +			 */
> > +			s = fsmonitor_ipc__get_state();
> > +			if (s == IPC_STATE__LISTENING)
> > +				return 0;
> > +
> > +			time(&now);
> > +			if (now > time_limit)
> > +				return error(_("fsmonitor--daemon not online yet"));
> > +		} else if (pid_seen == pid_child) {
> > +			/*
> > +			 * The new child daemon process shutdown while
> > +			 * it was starting up, so it is not listening
> > +			 * on the socket.
> > +			 *
> > +			 * Try to ping the socket in the odd chance
> > +			 * that another daemon started (or was already
> > +			 * running) while our child was starting.
> > +			 *
> > +			 * Again, we don't care who services the socket.
> > +			 */
> > +			s = fsmonitor_ipc__get_state();
> > +			if (s == IPC_STATE__LISTENING)
> > +				return 0;
> > +
> > +			/*
> > +			 * We don't care about the WEXITSTATUS() nor
> > +			 * any of the WIF*(status) values because
> > +			 * `cmd_fsmonitor__daemon()` does the `!!result`
> > +			 * trick on all function return values.
> > +			 *
> > +			 * So it is sufficient to just report the
> > +			 * early shutdown as an error.
> > +			 */
> > +			return error(_("fsmonitor--daemon failed to start"));
> > +		} else
> > +			return error(_("waitpid is confused"));
> > +	}
> > +}
>
> Ditto this. could we extend the wait_or_whine() function (or some
> extended version thereof) to do what you need with callbacks?
>
> It seems the main difference is just being able to pass down a flag for
> waitpid(), and the loop needing to check EINTR or not depending on
> whether WNOHANG is passed.

Given that over half of `wait_or_whine()` is concerned with signals, which
the `wait_for_background_startup()` function is not at all concerned with,
I see another main difference.

> For e.g. the "We don't care about the WEXITSTATUS()" you'd get that
> behavior with an adjusted wait_or_whine(). Wouldn't it be better to
> report what exit status it exits with e.g. if the top-level process is
> signalled? We do so in trace2 for other things we spawn...

The `wait_or_whine()` function also adjusts `atexit()` behavior, which we
would not want here.

Therefore, just like the suggestion about `daemonize()` above, it appears
to me as if the suggested refactoring would make the code dramatically
more complex and less readable.

In other words, it would be a refactoring for refactoring's sake.
Definitely not something I would suggest.

Ciao,
Johannes
Jeff Hostetler July 13, 2021, 2:39 p.m. UTC | #3
My response here is in addition to Dscho's remarks on this topic.
He makes excellent points that I'll just #include here.  I do want
to add my own $0.02 here.

On 7/1/21 6:18 PM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Thu, Jul 01 2021, Jeff Hostetler via GitGitGadget wrote:
> 
>> +#ifdef GIT_WINDOWS_NATIVE
>> +/*
>> + * Create a background process to run the daemon.  It should be completely
>> + * disassociated from the terminal.
>> + *
>> + * Conceptually like `daemonize()` but different because Windows does not
>> + * have `fork(2)`.  Spawn a normal Windows child process but without the
>> + * limitations of `start_command()` and `finish_command()`.
>> + *
>> + * The child process execs the "git fsmonitor--daemon run" command.
>> + *
>> + * The current process returns so that the caller can wait for the child
>> + * to startup before exiting.
>> + */
>> +static int spawn_background_fsmonitor_daemon(pid_t *pid)
>> +{
>> +	char git_exe[MAX_PATH];
>> +	struct strvec args = STRVEC_INIT;
>> +	int in, out;
>> +
>> +	GetModuleFileNameA(NULL, git_exe, MAX_PATH);
>> +
>> +	in = open("/dev/null", O_RDONLY);
>> +	out = open("/dev/null", O_WRONLY);
>> +
>> +	strvec_push(&args, git_exe);
>> +	strvec_push(&args, "fsmonitor--daemon");
>> +	strvec_push(&args, "run");
>> +	strvec_pushf(&args, "--ipc-threads=%d", fsmonitor__ipc_threads);
>> +
>> +	*pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out);
>> +	close(in);
>> +	close(out);
>> +
>> +	strvec_clear(&args);
>> +
>> +	if (*pid < 0)
>> +		return error(_("could not spawn fsmonitor--daemon in the background"));
>> +
>> +	return 0;
>> +}
>> +#else
>> +/*
>> + * Create a background process to run the daemon.  It should be completely
>> + * disassociated from the terminal.
>> + *
>> + * This is adapted from `daemonize()`.  Use `fork()` to directly
>> + * create and run the daemon in the child process.
>> + *
>> + * The fork-child can just call the run code; it does not need to exec
>> + * it.
>> + *
>> + * The fork-parent returns the child PID so that we can wait for the
>> + * child to startup before exiting.
>> + */
>> +static int spawn_background_fsmonitor_daemon(pid_t *pid)
>> +{
>> +	*pid = fork();
>> +
>> +	switch (*pid) {
>> +	case 0:
>> +		if (setsid() == -1)
>> +			error_errno(_("setsid failed"));
>> +		close(0);
>> +		close(1);
>> +		close(2);
>> +		sanitize_stdfds();
>> +
>> +		return !!fsmonitor_run_daemon();
>> +
>> +	case -1:
>> +		return error_errno(_("could not spawn fsmonitor--daemon in the background"));
>> +
>> +	default:
>> +		return 0;
>> +	}
>> +}
>> +#endif
> 
> The spawn_background_fsmonitor_daemon() function here is almost the same
> as daemonize(). I wonder if this & the Windows-specific one you have
> here can't be refactored into an API from what's now in setup.c.
> 
> Then we could make builtin/gc.c and daemon.c use that, so Windows could
> have background GC, and we'd have a more battle-tested central codepath
> for this tricky bit.
> 

I'd rather not refactor all of this and add unnecessary generality
and complexity just to save duplicating some of the code in daemonize().

And I'd rather not destabilize existing commands like gc and daemon
by changing the daemonize() layer on them.  If those commands need help,
let's have a separate conversation _later_ about what help they need
and if it makes sense to combine them.


> It seems to me like the only limitations on it are to have this return
> slightly more general things (e.g. not set its own errors, return
> structured data), and maybe some callback for what to do in the
> child/parent.

There are several issues here when trying to start a background process
and we're already on the edge of the behavioral differences between
Windows and Unix -- let's not make things more confusing with multiple
callbacks, returning structures, custom errors, and etc.

Also, since Windows doesn't do fork(), we don't have child/parent
branches in the call, so this whole "just pretend it's all Unix"
model doesn't work.

Even if we did pretend I'd still need ifdef'd callback routines to
either call `fsmonitor_run_daemon()` or build a command line (or have
blocks of functions that "just happen to never be called on one
platform or the other").


What I have here is an API that the primary (read: parent) calls
and gets back a 0 or -1 (with error message).  And that's it.
The primary can then wait for the child (whether from fork or
CreateProcess) to become responsive or fail to start.  And then
the primary can exit (with or without error).

So I think we're good.  Yes, there is an ifdef here, but I think
it is worth it.


> 
>> +/*
>> + * This is adapted from `wait_or_whine()`.  Watch the child process and
>> + * let it get started and begin listening for requests on the socket
>> + * before reporting our success.
>> + */
>> +static int wait_for_background_startup(pid_t pid_child)
>> +{
>> +	int status;
>> +	pid_t pid_seen;
>> +	enum ipc_active_state s;
>> +	time_t time_limit, now;
>> +
>> +	time(&time_limit);
>> +	time_limit += fsmonitor__start_timeout_sec;
>> +
>> +	for (;;) {
>> +		pid_seen = waitpid(pid_child, &status, WNOHANG);
>> +
>> +		if (pid_seen == -1)
>> +			return error_errno(_("waitpid failed"));
>> +		else if (pid_seen == 0) {
>> +			/*
>> +			 * The child is still running (this should be
>> +			 * the normal case).  Try to connect to it on
>> +			 * the socket and see if it is ready for
>> +			 * business.
>> +			 *
>> +			 * If there is another daemon already running,
>> +			 * our child will fail to start (possibly
>> +			 * after a timeout on the lock), but we don't
>> +			 * care (who responds) if the socket is live.
>> +			 */
>> +			s = fsmonitor_ipc__get_state();
>> +			if (s == IPC_STATE__LISTENING)
>> +				return 0;
>> +
>> +			time(&now);
>> +			if (now > time_limit)
>> +				return error(_("fsmonitor--daemon not online yet"));
>> +		} else if (pid_seen == pid_child) {
>> +			/*
>> +			 * The new child daemon process shutdown while
>> +			 * it was starting up, so it is not listening
>> +			 * on the socket.
>> +			 *
>> +			 * Try to ping the socket in the odd chance
>> +			 * that another daemon started (or was already
>> +			 * running) while our child was starting.
>> +			 *
>> +			 * Again, we don't care who services the socket.
>> +			 */
>> +			s = fsmonitor_ipc__get_state();
>> +			if (s == IPC_STATE__LISTENING)
>> +				return 0;
>> +
>> +			/*
>> +			 * We don't care about the WEXITSTATUS() nor
>> +			 * any of the WIF*(status) values because
>> +			 * `cmd_fsmonitor__daemon()` does the `!!result`
>> +			 * trick on all function return values.
>> +			 *
>> +			 * So it is sufficient to just report the
>> +			 * early shutdown as an error.
>> +			 */
>> +			return error(_("fsmonitor--daemon failed to start"));
>> +		} else
>> +			return error(_("waitpid is confused"));
>> +	}
>> +}
> 
> Ditto this. could we extend the wait_or_whine() function (or some
> extended version thereof) to do what you need with callbacks?
> 
> It seems the main difference is just being able to pass down a flag for
> waitpid(), and the loop needing to check EINTR or not depending on
> whether WNOHANG is passed.
> 
> For e.g. the "We don't care about the WEXITSTATUS()" you'd get that
> behavior with an adjusted wait_or_whine(). Wouldn't it be better to
> report what exit status it exits with e.g. if the top-level process is
> signalled? We do so in trace2 for other things we spawn...
> 

Again, I don't want to mix my usage here with the existing code
and destabilize all existing callers.  Here we are spinning to give
the child a chance to *start* and confirm that it is in a listening
state and ready for connections.  We do not wait for the child to
exit (unless it dies quickly without becoming ready).

We want to end our wait as soon as we confirm that the child is
ready and return.  All I really need from the system is `waitpid()`.

Also, since we started the child in my `spawn_background...()`, it
is not in the `children_to_clean` list, so there is no need to mess
with that.

So I'd like to leave this as is.

Jeff
Ævar Arnfjörð Bjarmason July 13, 2021, 5:54 p.m. UTC | #4
On Tue, Jul 13 2021, Jeff Hostetler wrote:

> My response here is in addition to Dscho's remarks on this topic.
> He makes excellent points that I'll just #include here.  I do want
> to add my own $0.02 here.
>
> On 7/1/21 6:18 PM, Ævar Arnfjörð Bjarmason wrote:
>> On Thu, Jul 01 2021, Jeff Hostetler via GitGitGadget wrote:
>> 
>>> +#ifdef GIT_WINDOWS_NATIVE
>>> +/*
>>> + * Create a background process to run the daemon.  It should be completely
>>> + * disassociated from the terminal.
>>> + *
>>> + * Conceptually like `daemonize()` but different because Windows does not
>>> + * have `fork(2)`.  Spawn a normal Windows child process but without the
>>> + * limitations of `start_command()` and `finish_command()`.
>>> + *
>>> + * The child process execs the "git fsmonitor--daemon run" command.
>>> + *
>>> + * The current process returns so that the caller can wait for the child
>>> + * to startup before exiting.
>>> + */
>>> +static int spawn_background_fsmonitor_daemon(pid_t *pid)
>>> +{
>>> +	char git_exe[MAX_PATH];
>>> +	struct strvec args = STRVEC_INIT;
>>> +	int in, out;
>>> +
>>> +	GetModuleFileNameA(NULL, git_exe, MAX_PATH);
>>> +
>>> +	in = open("/dev/null", O_RDONLY);
>>> +	out = open("/dev/null", O_WRONLY);
>>> +
>>> +	strvec_push(&args, git_exe);
>>> +	strvec_push(&args, "fsmonitor--daemon");
>>> +	strvec_push(&args, "run");
>>> +	strvec_pushf(&args, "--ipc-threads=%d", fsmonitor__ipc_threads);
>>> +
>>> +	*pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out);
>>> +	close(in);
>>> +	close(out);
>>> +
>>> +	strvec_clear(&args);
>>> +
>>> +	if (*pid < 0)
>>> +		return error(_("could not spawn fsmonitor--daemon in the background"));
>>> +
>>> +	return 0;
>>> +}
>>> +#else
>>> +/*
>>> + * Create a background process to run the daemon.  It should be completely
>>> + * disassociated from the terminal.
>>> + *
>>> + * This is adapted from `daemonize()`.  Use `fork()` to directly
>>> + * create and run the daemon in the child process.
>>> + *
>>> + * The fork-child can just call the run code; it does not need to exec
>>> + * it.
>>> + *
>>> + * The fork-parent returns the child PID so that we can wait for the
>>> + * child to startup before exiting.
>>> + */
>>> +static int spawn_background_fsmonitor_daemon(pid_t *pid)
>>> +{
>>> +	*pid = fork();
>>> +
>>> +	switch (*pid) {
>>> +	case 0:
>>> +		if (setsid() == -1)
>>> +			error_errno(_("setsid failed"));
>>> +		close(0);
>>> +		close(1);
>>> +		close(2);
>>> +		sanitize_stdfds();
>>> +
>>> +		return !!fsmonitor_run_daemon();
>>> +
>>> +	case -1:
>>> +		return error_errno(_("could not spawn fsmonitor--daemon in the background"));
>>> +
>>> +	default:
>>> +		return 0;
>>> +	}
>>> +}
>>> +#endif
>> The spawn_background_fsmonitor_daemon() function here is almost the
>> same
>> as daemonize(). I wonder if this & the Windows-specific one you have
>> here can't be refactored into an API from what's now in setup.c.
>> Then we could make builtin/gc.c and daemon.c use that, so Windows
>> could
>> have background GC, and we'd have a more battle-tested central codepath
>> for this tricky bit.
>> 
>
> I'd rather not refactor all of this and add unnecessary generality
> and complexity just to save duplicating some of the code in daemonize().
>
> And I'd rather not destabilize existing commands like gc and daemon
> by changing the daemonize() layer on them.  If those commands need help,
> let's have a separate conversation _later_ about what help they need
> and if it makes sense to combine them.

Johannes suggested in
https://lore.kernel.org/git/nycvar.QRO.7.76.6.2107052336480.8230@tvgsbejvaqbjf.bet/
that (if I understand that correctly, and I just skimmed the linked isse
some days ago), that even if such a refactoring was done these two
functions are solving subtly different problems, or something. I.e. we
couldn't use it for daemonize().

Which I'd say is interesting for the code comments/commit message at
least, i.e. how they're solving subtly different problems (not being
able to run this & not being able to test on Windows I haven't poked at
it myself).

>> It seems to me like the only limitations on it are to have this return
>> slightly more general things (e.g. not set its own errors, return
>> structured data), and maybe some callback for what to do in the
>> child/parent.
>
> There are several issues here when trying to start a background process
> and we're already on the edge of the behavioral differences between
> Windows and Unix -- let's not make things more confusing with multiple
> callbacks, returning structures, custom errors, and etc.
>
> Also, since Windows doesn't do fork(), we don't have child/parent
> branches in the call, so this whole "just pretend it's all Unix"
> model doesn't work.

Fair enough, And I think replied-to above.

> Even if we did pretend I'd still need ifdef'd callback routines to
> either call `fsmonitor_run_daemon()` or build a command line (or have
> blocks of functions that "just happen to never be called on one
> platform or the other").
>
>
> What I have here is an API that the primary (read: parent) calls
> and gets back a 0 or -1 (with error message).  And that's it.
> The primary can then wait for the child (whether from fork or
> CreateProcess) to become responsive or fail to start.  And then
> the primary can exit (with or without error).
>
> So I think we're good.  Yes, there is an ifdef here, but I think
> it is worth it.

FWIW what I was noting here & elsewhere is that yes, you need to ifdef
some of it, but the code you're proposing to add here is using a
different pattern than the one generally preferred in this codebase.

I.e. check out how we do it for threading, we intentionally compile the
"if (thread) {code}" clauses on platforms we know don't have threading,
ditto the code around PCRE in grep.c. 

Similarly, here in e.g. spawn_background_fsmonitor_daemon just the
GetModuleFileNameA() and mingw_spawnvpe() are Windows-specifics (and
could be calls to some helper that *is* ifdef'd).

In this case it's not a big deal, but as a general pattern it helps to
e.g. avoid subtle syntax errors in nested ifdefs and the like, and
generally encourages keeping the ifdef'd code as small as possible.

>>> +/*
>>> + * This is adapted from `wait_or_whine()`.  Watch the child process and
>>> + * let it get started and begin listening for requests on the socket
>>> + * before reporting our success.
>>> + */
>>> +static int wait_for_background_startup(pid_t pid_child)
>>> +{
>>> +	int status;
>>> +	pid_t pid_seen;
>>> +	enum ipc_active_state s;
>>> +	time_t time_limit, now;
>>> +
>>> +	time(&time_limit);
>>> +	time_limit += fsmonitor__start_timeout_sec;
>>> +
>>> +	for (;;) {
>>> +		pid_seen = waitpid(pid_child, &status, WNOHANG);
>>> +
>>> +		if (pid_seen == -1)
>>> +			return error_errno(_("waitpid failed"));
>>> +		else if (pid_seen == 0) {
>>> +			/*
>>> +			 * The child is still running (this should be
>>> +			 * the normal case).  Try to connect to it on
>>> +			 * the socket and see if it is ready for
>>> +			 * business.
>>> +			 *
>>> +			 * If there is another daemon already running,
>>> +			 * our child will fail to start (possibly
>>> +			 * after a timeout on the lock), but we don't
>>> +			 * care (who responds) if the socket is live.
>>> +			 */
>>> +			s = fsmonitor_ipc__get_state();
>>> +			if (s == IPC_STATE__LISTENING)
>>> +				return 0;
>>> +
>>> +			time(&now);
>>> +			if (now > time_limit)
>>> +				return error(_("fsmonitor--daemon not online yet"));
>>> +		} else if (pid_seen == pid_child) {
>>> +			/*
>>> +			 * The new child daemon process shutdown while
>>> +			 * it was starting up, so it is not listening
>>> +			 * on the socket.
>>> +			 *
>>> +			 * Try to ping the socket in the odd chance
>>> +			 * that another daemon started (or was already
>>> +			 * running) while our child was starting.
>>> +			 *
>>> +			 * Again, we don't care who services the socket.
>>> +			 */
>>> +			s = fsmonitor_ipc__get_state();
>>> +			if (s == IPC_STATE__LISTENING)
>>> +				return 0;
>>> +
>>> +			/*
>>> +			 * We don't care about the WEXITSTATUS() nor
>>> +			 * any of the WIF*(status) values because
>>> +			 * `cmd_fsmonitor__daemon()` does the `!!result`
>>> +			 * trick on all function return values.
>>> +			 *
>>> +			 * So it is sufficient to just report the
>>> +			 * early shutdown as an error.
>>> +			 */
>>> +			return error(_("fsmonitor--daemon failed to start"));
>>> +		} else
>>> +			return error(_("waitpid is confused"));
>>> +	}
>>> +}
>> Ditto this. could we extend the wait_or_whine() function (or some
>> extended version thereof) to do what you need with callbacks?
>> It seems the main difference is just being able to pass down a flag
>> for
>> waitpid(), and the loop needing to check EINTR or not depending on
>> whether WNOHANG is passed.
>> For e.g. the "We don't care about the WEXITSTATUS()" you'd get that
>> behavior with an adjusted wait_or_whine(). Wouldn't it be better to
>> report what exit status it exits with e.g. if the top-level process is
>> signalled? We do so in trace2 for other things we spawn...
>> 
>
> Again, I don't want to mix my usage here with the existing code
> and destabilize all existing callers.  Here we are spinning to give
> the child a chance to *start* and confirm that it is in a listening
> state and ready for connections.  We do not wait for the child to
> exit (unless it dies quickly without becoming ready).
>
> We want to end our wait as soon as we confirm that the child is
> ready and return.  All I really need from the system is `waitpid()`.

Will this code behave correctly if the daemon we start is signalled per
the WIFSIGNALED() cases the code this is derived handles, but this does
not?

But sure, I just meant to point out that the flip side to "destabilize
all existing callers" is reviewing new code that may be subtly buggy,
and those subtle bugs (if any) would be smoked out if we were forced to
extend run-command.c, i.e. to use whatever feature(s) this needs for all
existing callers.
Jeff Hostetler July 13, 2021, 6:44 p.m. UTC | #5
On 7/13/21 1:54 PM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Tue, Jul 13 2021, Jeff Hostetler wrote:
> 
>> My response here is in addition to Dscho's remarks on this topic.
>> He makes excellent points that I'll just #include here.  I do want
>> to add my own $0.02 here.
>>
>> On 7/1/21 6:18 PM, Ævar Arnfjörð Bjarmason wrote:
>>> On Thu, Jul 01 2021, Jeff Hostetler via GitGitGadget wrote:
>>>

>>>> +/*
>>>> + * This is adapted from `wait_or_whine()`.  Watch the child process and
>>>> + * let it get started and begin listening for requests on the socket
>>>> + * before reporting our success.
>>>> + */
>>>> +static int wait_for_background_startup(pid_t pid_child)
>>>> +{
>>>> +	int status;
>>>> +	pid_t pid_seen;
>>>> +	enum ipc_active_state s;
>>>> +	time_t time_limit, now;
>>>> +
>>>> +	time(&time_limit);
>>>> +	time_limit += fsmonitor__start_timeout_sec;
>>>> +
>>>> +	for (;;) {
>>>> +		pid_seen = waitpid(pid_child, &status, WNOHANG);
>>>> +
>>>> +		if (pid_seen == -1)
>>>> +			return error_errno(_("waitpid failed"));
>>>> +		else if (pid_seen == 0) {
>>>> +			/*
>>>> +			 * The child is still running (this should be
>>>> +			 * the normal case).  Try to connect to it on
>>>> +			 * the socket and see if it is ready for
>>>> +			 * business.
>>>> +			 *
>>>> +			 * If there is another daemon already running,
>>>> +			 * our child will fail to start (possibly
>>>> +			 * after a timeout on the lock), but we don't
>>>> +			 * care (who responds) if the socket is live.
>>>> +			 */
>>>> +			s = fsmonitor_ipc__get_state();
>>>> +			if (s == IPC_STATE__LISTENING)
>>>> +				return 0;
>>>> +
>>>> +			time(&now);
>>>> +			if (now > time_limit)
>>>> +				return error(_("fsmonitor--daemon not online yet"));
>>>> +		} else if (pid_seen == pid_child) {
>>>> +			/*
>>>> +			 * The new child daemon process shutdown while
>>>> +			 * it was starting up, so it is not listening
>>>> +			 * on the socket.
>>>> +			 *
>>>> +			 * Try to ping the socket in the odd chance
>>>> +			 * that another daemon started (or was already
>>>> +			 * running) while our child was starting.
>>>> +			 *
>>>> +			 * Again, we don't care who services the socket.
>>>> +			 */
>>>> +			s = fsmonitor_ipc__get_state();
>>>> +			if (s == IPC_STATE__LISTENING)
>>>> +				return 0;
>>>> +
>>>> +			/*
>>>> +			 * We don't care about the WEXITSTATUS() nor
>>>> +			 * any of the WIF*(status) values because
>>>> +			 * `cmd_fsmonitor__daemon()` does the `!!result`
>>>> +			 * trick on all function return values.
>>>> +			 *
>>>> +			 * So it is sufficient to just report the
>>>> +			 * early shutdown as an error.
>>>> +			 */
>>>> +			return error(_("fsmonitor--daemon failed to start"));
>>>> +		} else
>>>> +			return error(_("waitpid is confused"));
>>>> +	}
>>>> +}
>>> Ditto this. could we extend the wait_or_whine() function (or some
>>> extended version thereof) to do what you need with callbacks?
>>> It seems the main difference is just being able to pass down a flag
>>> for
>>> waitpid(), and the loop needing to check EINTR or not depending on
>>> whether WNOHANG is passed.
>>> For e.g. the "We don't care about the WEXITSTATUS()" you'd get that
>>> behavior with an adjusted wait_or_whine(). Wouldn't it be better to
>>> report what exit status it exits with e.g. if the top-level process is
>>> signalled? We do so in trace2 for other things we spawn...
>>>
>>
>> Again, I don't want to mix my usage here with the existing code
>> and destabilize all existing callers.  Here we are spinning to give
>> the child a chance to *start* and confirm that it is in a listening
>> state and ready for connections.  We do not wait for the child to
>> exit (unless it dies quickly without becoming ready).
>>
>> We want to end our wait as soon as we confirm that the child is
>> ready and return.  All I really need from the system is `waitpid()`.
> 
> Will this code behave correctly if the daemon we start is signalled per
> the WIFSIGNALED() cases the code this is derived handles, but this does
> not?

We're only waiting until the child gets started and is able to receive
requests -- what happens to it after we have confirmed that it is ready
is not our concern (after all, the parent is about to exit anyway and
the child is going to continue on).

If waitpid() gives us a WIFSIGNALED (or any other WIF*() state) before
we have spoken to it, we will return a "failed to start".

But again, that signal would have to arrive immediately after we spawned
it and *before* we could talk to it.  If the child is signaled after we
confirmed it was ready, we don't care because the parent process will be
gone.

(If the child is signaled or is killed (or crashes or whatever), the
next Git command (like "status") that tries to talk to it will re-start
it implicitly -- the `git fsmonitor--daemon start` command here is an
explicit start.)


> 
> But sure, I just meant to point out that the flip side to "destabilize
> all existing callers" is reviewing new code that may be subtly buggy,
> and those subtle bugs (if any) would be smoked out if we were forced to
> extend run-command.c, i.e. to use whatever feature(s) this needs for all
> existing callers.
> 

That would/could have a massive footprint.  And I've already established
that my usage here is sufficiently different from existing uses that the
result would be a mess. IMHO.

Jeff
Ævar Arnfjörð Bjarmason July 20, 2021, 7:38 p.m. UTC | #6
On Tue, Jul 13 2021, Jeff Hostetler wrote:

> On 7/13/21 1:54 PM, Ævar Arnfjörð Bjarmason wrote:
>> On Tue, Jul 13 2021, Jeff Hostetler wrote:
>> 
>>> My response here is in addition to Dscho's remarks on this topic.
>>> He makes excellent points that I'll just #include here.  I do want
>>> to add my own $0.02 here.
>>>
>>> On 7/1/21 6:18 PM, Ævar Arnfjörð Bjarmason wrote:
>>>> On Thu, Jul 01 2021, Jeff Hostetler via GitGitGadget wrote:
>>>>
>
>>>>> +/*
>>>>> + * This is adapted from `wait_or_whine()`.  Watch the child process and
>>>>> + * let it get started and begin listening for requests on the socket
>>>>> + * before reporting our success.
>>>>> + */
>>>>> +static int wait_for_background_startup(pid_t pid_child)
>>>>> +{
>>>>> +	int status;
>>>>> +	pid_t pid_seen;
>>>>> +	enum ipc_active_state s;
>>>>> +	time_t time_limit, now;
>>>>> +
>>>>> +	time(&time_limit);
>>>>> +	time_limit += fsmonitor__start_timeout_sec;
>>>>> +
>>>>> +	for (;;) {
>>>>> +		pid_seen = waitpid(pid_child, &status, WNOHANG);
>>>>> +
>>>>> +		if (pid_seen == -1)
>>>>> +			return error_errno(_("waitpid failed"));
>>>>> +		else if (pid_seen == 0) {
>>>>> +			/*
>>>>> +			 * The child is still running (this should be
>>>>> +			 * the normal case).  Try to connect to it on
>>>>> +			 * the socket and see if it is ready for
>>>>> +			 * business.
>>>>> +			 *
>>>>> +			 * If there is another daemon already running,
>>>>> +			 * our child will fail to start (possibly
>>>>> +			 * after a timeout on the lock), but we don't
>>>>> +			 * care (who responds) if the socket is live.
>>>>> +			 */
>>>>> +			s = fsmonitor_ipc__get_state();
>>>>> +			if (s == IPC_STATE__LISTENING)
>>>>> +				return 0;
>>>>> +
>>>>> +			time(&now);
>>>>> +			if (now > time_limit)
>>>>> +				return error(_("fsmonitor--daemon not online yet"));
>>>>> +		} else if (pid_seen == pid_child) {
>>>>> +			/*
>>>>> +			 * The new child daemon process shutdown while
>>>>> +			 * it was starting up, so it is not listening
>>>>> +			 * on the socket.
>>>>> +			 *
>>>>> +			 * Try to ping the socket in the odd chance
>>>>> +			 * that another daemon started (or was already
>>>>> +			 * running) while our child was starting.
>>>>> +			 *
>>>>> +			 * Again, we don't care who services the socket.
>>>>> +			 */
>>>>> +			s = fsmonitor_ipc__get_state();
>>>>> +			if (s == IPC_STATE__LISTENING)
>>>>> +				return 0;
>>>>> +
>>>>> +			/*
>>>>> +			 * We don't care about the WEXITSTATUS() nor
>>>>> +			 * any of the WIF*(status) values because
>>>>> +			 * `cmd_fsmonitor__daemon()` does the `!!result`
>>>>> +			 * trick on all function return values.
>>>>> +			 *
>>>>> +			 * So it is sufficient to just report the
>>>>> +			 * early shutdown as an error.
>>>>> +			 */
>>>>> +			return error(_("fsmonitor--daemon failed to start"));
>>>>> +		} else
>>>>> +			return error(_("waitpid is confused"));
>>>>> +	}
>>>>> +}
>>>> Ditto this. could we extend the wait_or_whine() function (or some
>>>> extended version thereof) to do what you need with callbacks?
>>>> It seems the main difference is just being able to pass down a flag
>>>> for
>>>> waitpid(), and the loop needing to check EINTR or not depending on
>>>> whether WNOHANG is passed.
>>>> For e.g. the "We don't care about the WEXITSTATUS()" you'd get that
>>>> behavior with an adjusted wait_or_whine(). Wouldn't it be better to
>>>> report what exit status it exits with e.g. if the top-level process is
>>>> signalled? We do so in trace2 for other things we spawn...
>>>>
>>>
>>> Again, I don't want to mix my usage here with the existing code
>>> and destabilize all existing callers.  Here we are spinning to give
>>> the child a chance to *start* and confirm that it is in a listening
>>> state and ready for connections.  We do not wait for the child to
>>> exit (unless it dies quickly without becoming ready).
>>>
>>> We want to end our wait as soon as we confirm that the child is
>>> ready and return.  All I really need from the system is `waitpid()`.
>> Will this code behave correctly if the daemon we start is signalled
>> per
>> the WIFSIGNALED() cases the code this is derived handles, but this does
>> not?
>
> We're only waiting until the child gets started and is able to receive
> requests -- what happens to it after we have confirmed that it is ready
> is not our concern (after all, the parent is about to exit anyway and
> the child is going to continue on).
>
> If waitpid() gives us a WIFSIGNALED (or any other WIF*() state) before
> we have spoken to it, we will return a "failed to start".

So in wait_or_whine() and finish_command() we capture all of that in
trace2 logs, we explicitly don't want that in this case? We do concern
ourselves with the exact exit status/signal status etc. of children we
start in most other scenarios for trace2 logging purposes.

> But again, that signal would have to arrive immediately after we spawned
> it and *before* we could talk to it.  If the child is signaled after we
> confirmed it was ready, we don't care because the parent process will be
> gone.
>
> (If the child is signaled or is killed (or crashes or whatever), the
> next Git command (like "status") that tries to talk to it will re-start
> it implicitly -- the `git fsmonitor--daemon start` command here is an
> explicit start.)
>
>
>> But sure, I just meant to point out that the flip side to
>> "destabilize
>> all existing callers" is reviewing new code that may be subtly buggy,
>> and those subtle bugs (if any) would be smoked out if we were forced to
>> extend run-command.c, i.e. to use whatever feature(s) this needs for all
>> existing callers.
>> 
>
> That would/could have a massive footprint.  And I've already established
> that my usage here is sufficiently different from existing uses that the
> result would be a mess. IMHO.

I hadn't see this before but it seems pretty much exactly the same code
was already added (by you) in 36a7eb68760 (t0052: add simple-ipc tests
and t/helper/test-simple-ipc tool, 2021-03-22), perhaps splitting it
into a utility function for the two to use with a callback mechanism
would reduce the footprint?

What I was suggesting was some continuation of the below.

(I stopped once I noticed the changes I was making to
builtin/fsmonitor--daemon.c didn't even compile (almost the entire file
is hidden behind a macro), but I've commented on that aspect
elsewhere. I.e. it's nice to be able to do tree-wide refactoring without
tripping over code hidden by ifdefs)

It passes all current tests for whatever that's worth, obviously not a
pretty API, and I'm not claiming it's correct.

But I think it's clear how the trace2/error handling part of it could be
further extracted into some utility, so this would just be a mode of
run-command.

Not saying you need to do it, but the comments about us explicitly not
caring at all about the exit state make me wonder if there's some reason
for why someone else would be tripping over some landmine if they did
that refactoring.

Anyway, just a thought. I see from other feedback that you seem to be
getting pretty exasperated with me.

I'm just trying to help this along, usually being able to piggy-back on
existing in-tree code and proving the correctness by passing all in-tree
tests with that piggy-backing helps more than hinders.

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 25f18f2726b..7365fff95f4 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -8,6 +8,7 @@
 #include "simple-ipc.h"
 #include "khash.h"
 #include "pkt-line.h"
+#include "run-command.h"
 
 static const char * const builtin_fsmonitor__daemon_usage[] = {
 	N_("git fsmonitor--daemon start [<options>]"),
@@ -1403,11 +1404,12 @@ static int wait_for_background_startup(pid_t pid_child)
 	time_limit += fsmonitor__start_timeout_sec;
 
 	for (;;) {
+		int saved_errno = 0;
+		code = wait_or_whine_extended(pid_child, &pid_seen, "TODO",
+					      0, WNOHANG, &saved_errno);
 		pid_seen = waitpid(pid_child, &status, WNOHANG);
 
-		if (pid_seen == -1)
-			return error_errno(_("waitpid failed"));
-		else if (pid_seen == 0) {
+		if (pid_seen == 0) {
 			/*
 			 * The child is still running (this should be
 			 * the normal case).  Try to connect to it on
@@ -1452,8 +1454,7 @@ static int wait_for_background_startup(pid_t pid_child)
 			 * early shutdown as an error.
 			 */
 			return error(_("fsmonitor--daemon failed to start"));
-		} else
-			return error(_("waitpid is confused"));
+		}
 	}
 }
 
diff --git a/run-command.c b/run-command.c
index aacc336f951..856e7d87c40 100644
--- a/run-command.c
+++ b/run-command.c
@@ -543,24 +543,28 @@ static inline void set_cloexec(int fd)
 		fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
 }
 
-static int wait_or_whine(pid_t pid, const char *argv0, int in_signal)
+int wait_or_whine_extended(pid_t pid, pid_t *waiting, const char *argv0,
+			   int in_signal, int waitpid_options, int *failed_errno)
 {
 	int status, code = -1;
-	pid_t waiting;
-	int failed_errno = 0;
 
-	while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
+	if (waitpid_options & WNOHANG)
+		*waiting = waitpid(pid, &status, waitpid_options);
+	else
+		while ((*waiting = waitpid(pid, &status, waitpid_options)) < 0 &&
+		       errno == EINTR)
 		;	/* nothing */
+
 	if (in_signal) {
 		if (WIFEXITED(status))
 			code = WEXITSTATUS(status);
 		return code;
 	}
 
-	if (waiting < 0) {
-		failed_errno = errno;
+	if (*waiting < 0) {
+		*failed_errno = errno;
 		error_errno("waitpid for %s failed", argv0);
-	} else if (waiting != pid) {
+	} else if (*waiting != pid) {
 		error("waitpid is confused (%s)", argv0);
 	} else if (WIFSIGNALED(status)) {
 		code = WTERMSIG(status);
@@ -574,14 +578,23 @@ static int wait_or_whine(pid_t pid, const char *argv0, int in_signal)
 		code += 128;
 	} else if (WIFEXITED(status)) {
 		code = WEXITSTATUS(status);
+	} else if (waitpid_options & WNOHANG && *waiting == 0) {
+		code = 0;
 	} else {
 		error("waitpid is confused (%s)", argv0);
 	}
 
-	clear_child_for_cleanup(pid);
+	return code;
+}
 
+static int wait_or_whine(pid_t pid, const char *argv0, int in_signal)
+{
+	pid_t ignore;
+	int failed_errno = 0;
+	int ret = wait_or_whine_extended(pid, &ignore, argv0, in_signal, 0, &failed_errno);
+	clear_child_for_cleanup(pid);
 	errno = failed_errno;
-	return code;
+	return ret;
 }
 
 static void trace_add_env(struct strbuf *dst, const char *const *deltaenv)
diff --git a/run-command.h b/run-command.h
index e321d23bbd2..fe39730f87a 100644
--- a/run-command.h
+++ b/run-command.h
@@ -182,6 +182,9 @@ void child_process_clear(struct child_process *);
 
 int is_executable(const char *name);
 
+int wait_or_whine_extended(pid_t pid, pid_t *waiting, const char *argv0,
+			   int in_signal, int waitpid_options, int *failed_errno);
+
 /**
  * Start a sub-process. Takes a pointer to a `struct child_process`
  * that specifies the details and returns pipe FDs (if requested).
diff --git a/t/helper/test-simple-ipc.c b/t/helper/test-simple-ipc.c
index 91345180750..44658a46713 100644
--- a/t/helper/test-simple-ipc.c
+++ b/t/helper/test-simple-ipc.c
@@ -9,6 +9,7 @@
 #include "parse-options.h"
 #include "thread-utils.h"
 #include "strvec.h"
+#include "run-command.h"
 
 #ifndef SUPPORTS_SIMPLE_IPC
 int cmd__simple_ipc(int argc, const char **argv)
@@ -349,7 +350,7 @@ static int spawn_server(pid_t *pid)
  */
 static int wait_for_server_startup(pid_t pid_child)
 {
-	int status;
+	int code;
 	pid_t pid_seen;
 	enum ipc_active_state s;
 	time_t time_limit, now;
@@ -358,12 +359,10 @@ static int wait_for_server_startup(pid_t pid_child)
 	time_limit += cl_args.max_wait_sec;
 
 	for (;;) {
-		pid_seen = waitpid(pid_child, &status, WNOHANG);
-
-		if (pid_seen == -1)
-			return error_errno(_("waitpid failed"));
-
-		else if (pid_seen == 0) {
+		int saved_errno = 0;
+		code = wait_or_whine_extended(pid_child, &pid_seen, "TODO",
+					      0, WNOHANG, &saved_errno);
+		if (pid_seen == 0) {
 			/*
 			 * The child is still running (this should be
 			 * the normal case).  Try to connect to it on
@@ -384,9 +383,7 @@ static int wait_for_server_startup(pid_t pid_child)
 				return error(_("daemon not online yet"));
 
 			continue;
-		}
-
-		else if (pid_seen == pid_child) {
+		} else if (pid_seen == pid_child) {
 			/*
 			 * The new child daemon process shutdown while
 			 * it was starting up, so it is not listening
@@ -412,10 +409,9 @@ static int wait_for_server_startup(pid_t pid_child)
 			 * early shutdown as an error.
 			 */
 			return error(_("daemon failed to start"));
+		} else if (code) {
+			BUG("??");
 		}
-
-		else
-			return error(_("waitpid is confused"));
 	}
 }
diff mbox series

Patch

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index a265c962ccc..7fcf960652f 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -9,6 +9,7 @@ 
 #include "khash.h"
 
 static const char * const builtin_fsmonitor__daemon_usage[] = {
+	N_("git fsmonitor--daemon start [<options>]"),
 	N_("git fsmonitor--daemon run [<options>]"),
 	N_("git fsmonitor--daemon stop"),
 	N_("git fsmonitor--daemon status"),
@@ -22,6 +23,9 @@  static const char * const builtin_fsmonitor__daemon_usage[] = {
 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
 static int fsmonitor__ipc_threads = 8;
 
+#define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
+static int fsmonitor__start_timeout_sec = 60;
+
 static int fsmonitor_config(const char *var, const char *value, void *cb)
 {
 	if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
@@ -33,6 +37,15 @@  static int fsmonitor_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
+		int i = git_config_int(var, value);
+		if (i < 0)
+			return error(_("value of '%s' out of range: %d"),
+				     FSMONITOR__START_TIMEOUT, i);
+		fsmonitor__start_timeout_sec = i;
+		return 0;
+	}
+
 	return git_default_config(var, value, cb);
 }
 
@@ -256,6 +269,194 @@  static int try_to_run_foreground_daemon(void)
 	return !!fsmonitor_run_daemon();
 }
 
+#ifdef GIT_WINDOWS_NATIVE
+/*
+ * Create a background process to run the daemon.  It should be completely
+ * disassociated from the terminal.
+ *
+ * Conceptually like `daemonize()` but different because Windows does not
+ * have `fork(2)`.  Spawn a normal Windows child process but without the
+ * limitations of `start_command()` and `finish_command()`.
+ *
+ * The child process execs the "git fsmonitor--daemon run" command.
+ *
+ * The current process returns so that the caller can wait for the child
+ * to startup before exiting.
+ */
+static int spawn_background_fsmonitor_daemon(pid_t *pid)
+{
+	char git_exe[MAX_PATH];
+	struct strvec args = STRVEC_INIT;
+	int in, out;
+
+	GetModuleFileNameA(NULL, git_exe, MAX_PATH);
+
+	in = open("/dev/null", O_RDONLY);
+	out = open("/dev/null", O_WRONLY);
+
+	strvec_push(&args, git_exe);
+	strvec_push(&args, "fsmonitor--daemon");
+	strvec_push(&args, "run");
+	strvec_pushf(&args, "--ipc-threads=%d", fsmonitor__ipc_threads);
+
+	*pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out);
+	close(in);
+	close(out);
+
+	strvec_clear(&args);
+
+	if (*pid < 0)
+		return error(_("could not spawn fsmonitor--daemon in the background"));
+
+	return 0;
+}
+#else
+/*
+ * Create a background process to run the daemon.  It should be completely
+ * disassociated from the terminal.
+ *
+ * This is adapted from `daemonize()`.  Use `fork()` to directly
+ * create and run the daemon in the child process.
+ *
+ * The fork-child can just call the run code; it does not need to exec
+ * it.
+ *
+ * The fork-parent returns the child PID so that we can wait for the
+ * child to startup before exiting.
+ */
+static int spawn_background_fsmonitor_daemon(pid_t *pid)
+{
+	*pid = fork();
+
+	switch (*pid) {
+	case 0:
+		if (setsid() == -1)
+			error_errno(_("setsid failed"));
+		close(0);
+		close(1);
+		close(2);
+		sanitize_stdfds();
+
+		return !!fsmonitor_run_daemon();
+
+	case -1:
+		return error_errno(_("could not spawn fsmonitor--daemon in the background"));
+
+	default:
+		return 0;
+	}
+}
+#endif
+
+/*
+ * This is adapted from `wait_or_whine()`.  Watch the child process and
+ * let it get started and begin listening for requests on the socket
+ * before reporting our success.
+ */
+static int wait_for_background_startup(pid_t pid_child)
+{
+	int status;
+	pid_t pid_seen;
+	enum ipc_active_state s;
+	time_t time_limit, now;
+
+	time(&time_limit);
+	time_limit += fsmonitor__start_timeout_sec;
+
+	for (;;) {
+		pid_seen = waitpid(pid_child, &status, WNOHANG);
+
+		if (pid_seen == -1)
+			return error_errno(_("waitpid failed"));
+		else if (pid_seen == 0) {
+			/*
+			 * The child is still running (this should be
+			 * the normal case).  Try to connect to it on
+			 * the socket and see if it is ready for
+			 * business.
+			 *
+			 * If there is another daemon already running,
+			 * our child will fail to start (possibly
+			 * after a timeout on the lock), but we don't
+			 * care (who responds) if the socket is live.
+			 */
+			s = fsmonitor_ipc__get_state();
+			if (s == IPC_STATE__LISTENING)
+				return 0;
+
+			time(&now);
+			if (now > time_limit)
+				return error(_("fsmonitor--daemon not online yet"));
+		} else if (pid_seen == pid_child) {
+			/*
+			 * The new child daemon process shutdown while
+			 * it was starting up, so it is not listening
+			 * on the socket.
+			 *
+			 * Try to ping the socket in the odd chance
+			 * that another daemon started (or was already
+			 * running) while our child was starting.
+			 *
+			 * Again, we don't care who services the socket.
+			 */
+			s = fsmonitor_ipc__get_state();
+			if (s == IPC_STATE__LISTENING)
+				return 0;
+
+			/*
+			 * We don't care about the WEXITSTATUS() nor
+			 * any of the WIF*(status) values because
+			 * `cmd_fsmonitor__daemon()` does the `!!result`
+			 * trick on all function return values.
+			 *
+			 * So it is sufficient to just report the
+			 * early shutdown as an error.
+			 */
+			return error(_("fsmonitor--daemon failed to start"));
+		} else
+			return error(_("waitpid is confused"));
+	}
+}
+
+static int try_to_start_background_daemon(void)
+{
+	pid_t pid_child;
+	int ret;
+
+	/*
+	 * Before we try to create a background daemon process, see
+	 * if a daemon process is already listening.  This makes it
+	 * easier for us to report an already-listening error to the
+	 * console, since our spawn/daemon can only report the success
+	 * of creating the background process (and not whether it
+	 * immediately exited).
+	 */
+	if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
+		die("fsmonitor--daemon is already running '%s'",
+		    the_repository->worktree);
+
+	printf(_("starting fsmonitor-daemon in '%s'\n"),
+	       the_repository->worktree);
+	fflush(stdout);
+
+	/*
+	 * Run the actual daemon in a background process.
+	 */
+	ret = spawn_background_fsmonitor_daemon(&pid_child);
+	if (pid_child <= 0)
+		return ret;
+
+	/*
+	 * Wait (with timeout) for the background child process get
+	 * started and begin listening on the socket/pipe.  This makes
+	 * the "start" command more synchronous and more reliable in
+	 * tests.
+	 */
+	ret = wait_for_background_startup(pid_child);
+
+	return ret;
+}
+
 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
 {
 	const char *subcmd;
@@ -264,6 +465,10 @@  int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
 		OPT_INTEGER(0, "ipc-threads",
 			    &fsmonitor__ipc_threads,
 			    N_("use <n> ipc worker threads")),
+		OPT_INTEGER(0, "start-timeout",
+			    &fsmonitor__start_timeout_sec,
+			    N_("Max seconds to wait for background daemon startup")),
+
 		OPT_END()
 	};
 
@@ -285,6 +490,9 @@  int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
 		die(_("invalid 'ipc-threads' value (%d)"),
 		    fsmonitor__ipc_threads);
 
+	if (!strcmp(subcmd, "start"))
+		return !!try_to_start_background_daemon();
+
 	if (!strcmp(subcmd, "run"))
 		return !!try_to_run_foreground_daemon();