diff mbox series

[10/23] fsmonitor--daemon: add pathname classification

Message ID 451563314d84f9d6dee29a4899b5d18033aa227d.1617291666.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Builtin FSMonitor Feature | expand

Commit Message

Jeff Hostetler April 1, 2021, 3:40 p.m. UTC
From: Jeff Hostetler <jeffhost@microsoft.com>

Teach fsmonitor--daemon to classify relative and absolute
pathnames and decide how they should be handled.  This will
be used by the platform-specific backend to respond to each
filesystem event.

When we register for filesystem notifications on a directory,
we get events for everything (recursively) in the directory.
We want to report to clients changes to tracked and untracked
paths within the working directory.  We do not want to report
changes within the .git directory, for example.

This classification will be used in a later commit by the
different backends to classify paths as events are received.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 builtin/fsmonitor--daemon.c | 81 +++++++++++++++++++++++++++++++++++++
 fsmonitor--daemon.h         | 61 ++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)

Comments

Derrick Stolee April 26, 2021, 7:17 p.m. UTC | #1
On 4/1/2021 11:40 AM, Jeff Hostetler via GitGitGadget wrote:
> From: Jeff Hostetler <jeffhost@microsoft.com>
...
> +#define FSMONITOR_COOKIE_PREFIX ".fsmonitor-daemon-"
> +
> +enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
> +	const char *rel)
> +{
> +	if (fspathncmp(rel, ".git", 4))
> +		return IS_WORKDIR_PATH;
> +	rel += 4;
> +
> +	if (!*rel)
> +		return IS_DOT_GIT;
> +	if (*rel != '/')
> +		return IS_WORKDIR_PATH; /* e.g. .gitignore */
> +	rel++;
> +
> +	if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
> +			strlen(FSMONITOR_COOKIE_PREFIX)))

Seems like this strlen() could be abstracted out. Is it
something the compiler can compute and set for us? Or,
should we create a macro for this constant?

> +		return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
> +
> +	return IS_INSIDE_DOT_GIT;
> +}

Here is the reasoning I was missing for why we watch the .git
directory.

> +enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
> +	const char *rel)
> +{
> +	if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
> +			strlen(FSMONITOR_COOKIE_PREFIX)))
> +		return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
> +
> +	return IS_INSIDE_GITDIR;
> +}

And I was about to ask "what happens if we are watching the .git
directory of a worktree?" but here we have a different classifier.

> +static enum fsmonitor_path_type try_classify_workdir_abs_path(
> +	struct fsmonitor_daemon_state *state,
> +	const char *path)
> +{
> +	const char *rel;
> +
> +	if (fspathncmp(path, state->path_worktree_watch.buf,
> +		       state->path_worktree_watch.len))
> +		return IS_OUTSIDE_CONE;
> +
> +	rel = path + state->path_worktree_watch.len;
> +
> +	if (!*rel)
> +		return IS_WORKDIR_PATH; /* it is the root dir exactly */
> +	if (*rel != '/')
> +		return IS_OUTSIDE_CONE;
> +	rel++;
> +
> +	return fsmonitor_classify_path_workdir_relative(rel);
> +}
> +
> +enum fsmonitor_path_type fsmonitor_classify_path_absolute(
> +	struct fsmonitor_daemon_state *state,
> +	const char *path)
> +{
> +	const char *rel;
> +	enum fsmonitor_path_type t;
> +
> +	t = try_classify_workdir_abs_path(state, path);
> +	if (state->nr_paths_watching == 1)
> +		return t;
> +	if (t != IS_OUTSIDE_CONE)
> +		return t;
> +
> +	if (fspathncmp(path, state->path_gitdir_watch.buf,
> +		       state->path_gitdir_watch.len))
> +		return IS_OUTSIDE_CONE;
> +
> +	rel = path + state->path_gitdir_watch.len;
> +
> +	if (!*rel)
> +		return IS_GITDIR; /* it is the <gitdir> exactly */
> +	if (*rel != '/')
> +		return IS_OUTSIDE_CONE;
> +	rel++;
> +
> +	return fsmonitor_classify_path_gitdir_relative(rel);
> +}

And here is where you differentiate the event across the two
cases. OK.

> +/*
> + * Pathname classifications.
> + *
> + * The daemon classifies the pathnames that it receives from file
> + * system notification events into the following categories and uses
> + * that to decide whether clients are told about them.  (And to watch
> + * for file system synchronization events.)
> + *
> + * The client should only care about paths within the working
> + * directory proper (inside the working directory and not ".git" nor
> + * inside of ".git/").  That is, the client has read the index and is
> + * asking for a list of any paths in the working directory that have
> + * been modified since the last token.  The client does not care about
> + * file system changes within the .git directory (such as new loose
> + * objects or packfiles).  So the client will only receive paths that
> + * are classified as IS_WORKDIR_PATH.
> + *
> + * The daemon uses the IS_DOT_GIT and IS_GITDIR internally to mean the
> + * exact ".git" directory or GITDIR.  If the daemon receives a delete
> + * event for either of these directories, it will automatically
> + * shutdown, for example.
> + *
> + * Note that the daemon DOES NOT explicitly watch nor special case the
> + * ".git/index" file.  The daemon does not read the index and does not
> + * have any internal index-relative state.  The daemon only collects
> + * the set of modified paths within the working directory.
> + */
> +enum fsmonitor_path_type {
> +	IS_WORKDIR_PATH = 0,
> +
> +	IS_DOT_GIT,
> +	IS_INSIDE_DOT_GIT,
> +	IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX,
> +
> +	IS_GITDIR,
> +	IS_INSIDE_GITDIR,
> +	IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX,
> +
> +	IS_OUTSIDE_CONE,
> +};
> +
> +/*
> + * Classify a pathname relative to the root of the working directory.
> + */
> +enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
> +	const char *relative_path);
> +
> +/*
> + * Classify a pathname relative to a <gitdir> that is external to the
> + * worktree directory.
> + */
> +enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
> +	const char *relative_path);
> +
> +/*
> + * Classify an absolute pathname received from a filesystem event.
> + */
> +enum fsmonitor_path_type fsmonitor_classify_path_absolute(
> +	struct fsmonitor_daemon_state *state,
> +	const char *path);
> +
>  #endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
>  #endif /* FSMONITOR_DAEMON_H */

Had I looked ahead and read these comments beforehand, then I would
have had an easier time determining the intended behavior from the
implementations. Oops.

Thanks,
-Stolee
Eric Sunshine April 26, 2021, 8:11 p.m. UTC | #2
On Mon, Apr 26, 2021 at 3:17 PM Derrick Stolee <stolee@gmail.com> wrote:
> On 4/1/2021 11:40 AM, Jeff Hostetler via GitGitGadget wrote:
> > +#define FSMONITOR_COOKIE_PREFIX ".fsmonitor-daemon-"
> > +
> > +     if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
> > +                     strlen(FSMONITOR_COOKIE_PREFIX)))
>
> Seems like this strlen() could be abstracted out. Is it
> something the compiler can compute and set for us? Or,
> should we create a macro for this constant?

If you're asking whether the compiler will resolve strlen("literal
string") to an integer constant at compile time rather than computing
the length at runtime, then the answer is that on this project we
presume that the compiler is smart enough to do that.

Or are you asking for a function something like this?

    fspathhasprefix(rel, FSMONITOR_COOKIE_PREFIX)
Derrick Stolee April 26, 2021, 8:24 p.m. UTC | #3
On 4/26/2021 4:11 PM, Eric Sunshine wrote:
> On Mon, Apr 26, 2021 at 3:17 PM Derrick Stolee <stolee@gmail.com> wrote:
>> On 4/1/2021 11:40 AM, Jeff Hostetler via GitGitGadget wrote:
>>> +#define FSMONITOR_COOKIE_PREFIX ".fsmonitor-daemon-"
>>> +
>>> +     if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
>>> +                     strlen(FSMONITOR_COOKIE_PREFIX)))
>>
>> Seems like this strlen() could be abstracted out. Is it
>> something the compiler can compute and set for us? Or,
>> should we create a macro for this constant?
> 
> If you're asking whether the compiler will resolve strlen("literal
> string") to an integer constant at compile time rather than computing
> the length at runtime, then the answer is that on this project we
> presume that the compiler is smart enough to do that.

That is what I was asking.

> Or are you asking for a function something like this?
> 
>     fspathhasprefix(rel, FSMONITOR_COOKIE_PREFIX)

The "fix" I would recommend otherwise would be

	if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
			FSMONITOR_COOKIE_PREFIX_LEN))

which is much uglier. I'm glad we can trust the compiler to
be smart enough.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index 23a063707972..16252487240a 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -168,6 +168,87 @@  static int handle_client(void *data, const char *command,
 	return result;
 }
 
+#define FSMONITOR_COOKIE_PREFIX ".fsmonitor-daemon-"
+
+enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
+	const char *rel)
+{
+	if (fspathncmp(rel, ".git", 4))
+		return IS_WORKDIR_PATH;
+	rel += 4;
+
+	if (!*rel)
+		return IS_DOT_GIT;
+	if (*rel != '/')
+		return IS_WORKDIR_PATH; /* e.g. .gitignore */
+	rel++;
+
+	if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
+			strlen(FSMONITOR_COOKIE_PREFIX)))
+		return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
+
+	return IS_INSIDE_DOT_GIT;
+}
+
+enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
+	const char *rel)
+{
+	if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
+			strlen(FSMONITOR_COOKIE_PREFIX)))
+		return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
+
+	return IS_INSIDE_GITDIR;
+}
+
+static enum fsmonitor_path_type try_classify_workdir_abs_path(
+	struct fsmonitor_daemon_state *state,
+	const char *path)
+{
+	const char *rel;
+
+	if (fspathncmp(path, state->path_worktree_watch.buf,
+		       state->path_worktree_watch.len))
+		return IS_OUTSIDE_CONE;
+
+	rel = path + state->path_worktree_watch.len;
+
+	if (!*rel)
+		return IS_WORKDIR_PATH; /* it is the root dir exactly */
+	if (*rel != '/')
+		return IS_OUTSIDE_CONE;
+	rel++;
+
+	return fsmonitor_classify_path_workdir_relative(rel);
+}
+
+enum fsmonitor_path_type fsmonitor_classify_path_absolute(
+	struct fsmonitor_daemon_state *state,
+	const char *path)
+{
+	const char *rel;
+	enum fsmonitor_path_type t;
+
+	t = try_classify_workdir_abs_path(state, path);
+	if (state->nr_paths_watching == 1)
+		return t;
+	if (t != IS_OUTSIDE_CONE)
+		return t;
+
+	if (fspathncmp(path, state->path_gitdir_watch.buf,
+		       state->path_gitdir_watch.len))
+		return IS_OUTSIDE_CONE;
+
+	rel = path + state->path_gitdir_watch.len;
+
+	if (!*rel)
+		return IS_GITDIR; /* it is the <gitdir> exactly */
+	if (*rel != '/')
+		return IS_OUTSIDE_CONE;
+	rel++;
+
+	return fsmonitor_classify_path_gitdir_relative(rel);
+}
+
 static void *fsmonitor_fs_listen__thread_proc(void *_state)
 {
 	struct fsmonitor_daemon_state *state = _state;
diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h
index 09e4a6fb6675..97ea3766e900 100644
--- a/fsmonitor--daemon.h
+++ b/fsmonitor--daemon.h
@@ -32,5 +32,66 @@  struct fsmonitor_daemon_state {
 	int test_client_delay_ms;
 };
 
+/*
+ * Pathname classifications.
+ *
+ * The daemon classifies the pathnames that it receives from file
+ * system notification events into the following categories and uses
+ * that to decide whether clients are told about them.  (And to watch
+ * for file system synchronization events.)
+ *
+ * The client should only care about paths within the working
+ * directory proper (inside the working directory and not ".git" nor
+ * inside of ".git/").  That is, the client has read the index and is
+ * asking for a list of any paths in the working directory that have
+ * been modified since the last token.  The client does not care about
+ * file system changes within the .git directory (such as new loose
+ * objects or packfiles).  So the client will only receive paths that
+ * are classified as IS_WORKDIR_PATH.
+ *
+ * The daemon uses the IS_DOT_GIT and IS_GITDIR internally to mean the
+ * exact ".git" directory or GITDIR.  If the daemon receives a delete
+ * event for either of these directories, it will automatically
+ * shutdown, for example.
+ *
+ * Note that the daemon DOES NOT explicitly watch nor special case the
+ * ".git/index" file.  The daemon does not read the index and does not
+ * have any internal index-relative state.  The daemon only collects
+ * the set of modified paths within the working directory.
+ */
+enum fsmonitor_path_type {
+	IS_WORKDIR_PATH = 0,
+
+	IS_DOT_GIT,
+	IS_INSIDE_DOT_GIT,
+	IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX,
+
+	IS_GITDIR,
+	IS_INSIDE_GITDIR,
+	IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX,
+
+	IS_OUTSIDE_CONE,
+};
+
+/*
+ * Classify a pathname relative to the root of the working directory.
+ */
+enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
+	const char *relative_path);
+
+/*
+ * Classify a pathname relative to a <gitdir> that is external to the
+ * worktree directory.
+ */
+enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
+	const char *relative_path);
+
+/*
+ * Classify an absolute pathname received from a filesystem event.
+ */
+enum fsmonitor_path_type fsmonitor_classify_path_absolute(
+	struct fsmonitor_daemon_state *state,
+	const char *path);
+
 #endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
 #endif /* FSMONITOR_DAEMON_H */