diff mbox series

[v2,3/6] Implement `scalar diagnose`

Message ID 0d570137bb6aef675f4f5d74d140ace1dfba5eb7.1644187146.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series scalar: implement the subcommand "diagnose" | expand

Commit Message

Johannes Schindelin Feb. 6, 2022, 10:39 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

Over the course of Scalar's development, it became obvious that there is
a need for a command that can gather all kinds of useful information
that can help identify the most typical problems with large
worktrees/repositories.

The `diagnose` command is the culmination of this hard-won knowledge: it
gathers the installed hooks, the config, a couple statistics describing
the data shape, among other pieces of information, and then wraps
everything up in a tidy, neat `.zip` archive.

Note: originally, Scalar was implemented in C# using the .NET API, where
we had the luxury of a comprehensive standard library that includes
basic functionality such as writing a `.zip` file. In the C version, we
lack such a commodity. Rather than introducing a dependency on, say,
libzip, we slightly abuse Git's `archive` command: Instead of writing
the `.zip` file directly, we stage the file contents in a Git index of a
temporary, bare repository, only to let `git archive` have at it, and
finally removing the temporary repository.

Also note: Due to the frequently-spawned `git hash-object` processes,
this command is quite a bit slow on Windows. Should it turn out to be a
big problem, the lack of a batch mode of the `hash-object` command could
potentially be worked around via using `git fast-import` with a crafted
`stdin`.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 contrib/scalar/scalar.c          | 143 +++++++++++++++++++++++++++++++
 contrib/scalar/scalar.txt        |  12 +++
 contrib/scalar/t/t9099-scalar.sh |  14 +++
 3 files changed, 169 insertions(+)

Comments

René Scharfe Feb. 7, 2022, 7:55 p.m. UTC | #1
Am 06.02.22 um 23:39 schrieb Johannes Schindelin via GitGitGadget:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> Over the course of Scalar's development, it became obvious that there is
> a need for a command that can gather all kinds of useful information
> that can help identify the most typical problems with large
> worktrees/repositories.
>
> The `diagnose` command is the culmination of this hard-won knowledge: it
> gathers the installed hooks, the config, a couple statistics describing
> the data shape, among other pieces of information, and then wraps
> everything up in a tidy, neat `.zip` archive.
>
> Note: originally, Scalar was implemented in C# using the .NET API, where
> we had the luxury of a comprehensive standard library that includes
> basic functionality such as writing a `.zip` file. In the C version, we
> lack such a commodity. Rather than introducing a dependency on, say,
> libzip, we slightly abuse Git's `archive` command: Instead of writing
> the `.zip` file directly, we stage the file contents in a Git index of a
> temporary, bare repository, only to let `git archive` have at it, and
> finally removing the temporary repository.
>
> Also note: Due to the frequently-spawned `git hash-object` processes,
> this command is quite a bit slow on Windows. Should it turn out to be a
> big problem, the lack of a batch mode of the `hash-object` command could
> potentially be worked around via using `git fast-import` with a crafted
> `stdin`.

The two paragraphs above are not in sync with the patch.

>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
>  contrib/scalar/scalar.c          | 143 +++++++++++++++++++++++++++++++
>  contrib/scalar/scalar.txt        |  12 +++
>  contrib/scalar/t/t9099-scalar.sh |  14 +++
>  3 files changed, 169 insertions(+)
>
> diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c
> index 00dcd4b50ef..30ce0799c7a 100644
> --- a/contrib/scalar/scalar.c
> +++ b/contrib/scalar/scalar.c
> @@ -11,6 +11,7 @@
>  #include "dir.h"
>  #include "packfile.h"
>  #include "help.h"
> +#include "archive.h"
>
>  /*
>   * Remove the deepest subdirectory in the provided path string. Path must not
> @@ -261,6 +262,44 @@ static int unregister_dir(void)
>  	return res;
>  }
>
> +static int add_directory_to_archiver(struct strvec *archiver_args,
> +					  const char *path, int recurse)
> +{
> +	int at_root = !*path;
> +	DIR *dir = opendir(at_root ? "." : path);
> +	struct dirent *e;
> +	struct strbuf buf = STRBUF_INIT;
> +	size_t len;
> +	int res = 0;
> +
> +	if (!dir)
> +		return error(_("could not open directory '%s'"), path);
> +
> +	if (!at_root)
> +		strbuf_addf(&buf, "%s/", path);
> +	len = buf.len;
> +	strvec_pushf(archiver_args, "--prefix=%s", buf.buf);
> +
> +	while (!res && (e = readdir(dir))) {
> +		if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name))
> +			continue;
> +
> +		strbuf_setlen(&buf, len);
> +		strbuf_addstr(&buf, e->d_name);
> +
> +		if (e->d_type == DT_REG)
> +			strvec_pushf(archiver_args, "--add-file=%s", buf.buf);
> +		else if (e->d_type != DT_DIR)
> +			res = -1;
> +		else if (recurse)
> +		     add_directory_to_archiver(archiver_args, buf.buf, recurse);
> +	}
> +
> +	closedir(dir);
> +	strbuf_release(&buf);
> +	return res;
> +}
> +
>  /* printf-style interface, expects `<key>=<value>` argument */
>  static int set_config(const char *fmt, ...)
>  {
> @@ -501,6 +540,109 @@ cleanup:
>  	return res;
>  }
>
> +static int cmd_diagnose(int argc, const char **argv)
> +{
> +	struct option options[] = {
> +		OPT_END(),
> +	};
> +	const char * const usage[] = {
> +		N_("scalar diagnose [<enlistment>]"),
> +		NULL
> +	};
> +	struct strbuf zip_path = STRBUF_INIT;
> +	struct strvec archiver_args = STRVEC_INIT;
> +	char **argv_copy = NULL;
> +	int stdout_fd = -1, archiver_fd = -1;
> +	time_t now = time(NULL);
> +	struct tm tm;
> +	struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT;
> +	size_t off;
> +	int res = 0;
> +
> +	argc = parse_options(argc, argv, NULL, options,
> +			     usage, 0);
> +
> +	setup_enlistment_directory(argc, argv, usage, options, &zip_path);
> +
> +	strbuf_addstr(&zip_path, "/.scalarDiagnostics/scalar_");
> +	strbuf_addftime(&zip_path,
> +			"%Y%m%d_%H%M%S", localtime_r(&now, &tm), 0, 0);
> +	strbuf_addstr(&zip_path, ".zip");
> +	switch (safe_create_leading_directories(zip_path.buf)) {
> +	case SCLD_EXISTS:
> +	case SCLD_OK:
> +		break;
> +	default:
> +		error_errno(_("could not create directory for '%s'"),
> +			    zip_path.buf);
> +		goto diagnose_cleanup;
> +	}
> +	stdout_fd = dup(1);
> +	if (stdout_fd < 0) {
> +		res = error_errno(_("could not duplicate stdout"));
> +		goto diagnose_cleanup;
> +	}
> +
> +	archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666);
> +	if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) {
> +		res = error_errno(_("could not redirect output"));
> +		goto diagnose_cleanup;
> +	}
> +
> +	init_zip_archiver();
> +	strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL);
> +
> +	strbuf_reset(&buf);
> +	strbuf_addstr(&buf,
> +		      "--add-file-with-content=diagnostics.log:"
> +		      "Collecting diagnostic info\n\n");
> +	get_version_info(&buf, 1);
> +
> +	strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree);
> +	off = strchr(buf.buf, ':') + 1 - buf.buf;
> +	write_or_die(stdout_fd, buf.buf + off, buf.len - off);
> +	strvec_push(&archiver_args, buf.buf);

Fun trick to reuse the buffer for both the ZIP entry and stdout. :)  I'd
have omitted the option from buf and added it like this, for simplicity:

	strvec_pushf(&archiver_args,
		     "--add-file-with-content=diagnostics.log:%s", buf.buf);

Just a thought.

> +
> +	if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) ||
> +	    (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) ||
> +	    (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) ||
> +	    (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) ||
> +	    (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0)))
> +		goto diagnose_cleanup;
> +
> +	strvec_pushl(&archiver_args, "--prefix=",
> +		     oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
> +
> +	/* `write_archive()` modifies the `argv` passed to it. Let it. */
> +	argv_copy = xmemdupz(archiver_args.v,
> +			     sizeof(char *) * archiver_args.nr);

Leaking the whole thing would be fine as well for this command, but
cleaning up is tidier, of course.

> +	res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL,
> +			    the_repository, NULL, 0);

Ah -- no shell means no command line length limits. :)

> +	if (res) {
> +		error(_("failed to write archive"));
> +		goto diagnose_cleanup;
> +	}
> +
> +	if (!res)
> +		printf("\n"
> +		       "Diagnostics complete.\n"
> +		       "All of the gathered info is captured in '%s'\n",
> +		       zip_path.buf);

Is this message appended to the ZIP file or does it go to stdout?

In any case: mixing write(2) and stdio(3) is not a good idea.  Using
fwrite(3) instead of write_or_die above and doing the stdout dup(2)
dance only tightly around the write_archive call would help, I think.

> +
> +diagnose_cleanup:
> +	if (archiver_fd >= 0) {
> +		close(1);
> +		dup2(stdout_fd, 1);
> +	}
> +	free(argv_copy);
> +	strvec_clear(&archiver_args);
> +	strbuf_release(&zip_path);
> +	strbuf_release(&path);
> +	strbuf_release(&buf);
> +
> +	return res;
> +}
> +
>  static int cmd_list(int argc, const char **argv)
>  {
>  	if (argc != 1)
> @@ -802,6 +944,7 @@ static struct {
>  	{ "reconfigure", cmd_reconfigure },
>  	{ "delete", cmd_delete },
>  	{ "version", cmd_version },
> +	{ "diagnose", cmd_diagnose },
>  	{ NULL, NULL},
>  };
>
> diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt
> index f416d637289..22583fe046e 100644
> --- a/contrib/scalar/scalar.txt
> +++ b/contrib/scalar/scalar.txt
> @@ -14,6 +14,7 @@ scalar register [<enlistment>]
>  scalar unregister [<enlistment>]
>  scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>]
>  scalar reconfigure [ --all | <enlistment> ]
> +scalar diagnose [<enlistment>]
>  scalar delete <enlistment>
>
>  DESCRIPTION
> @@ -129,6 +130,17 @@ reconfigure the enlistment.
>  With the `--all` option, all enlistments currently registered with Scalar
>  will be reconfigured. Use this option after each Scalar upgrade.
>
> +Diagnose
> +~~~~~~~~
> +
> +diagnose [<enlistment>]::
> +    When reporting issues with Scalar, it is often helpful to provide the
> +    information gathered by this command, including logs and certain
> +    statistics describing the data shape of the current enlistment.
> ++
> +The output of this command is a `.zip` file that is written into
> +a directory adjacent to the worktree in the `src` directory.
> +
>  Delete
>  ~~~~~~
>
> diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh
> index 9d83fdf25e8..bbd07a44426 100755
> --- a/contrib/scalar/t/t9099-scalar.sh
> +++ b/contrib/scalar/t/t9099-scalar.sh
> @@ -90,4 +90,18 @@ test_expect_success '`scalar [...] <dir>` errors out when dir is missing' '
>  	grep "cloned. does not exist" err
>  '
>
> +SQ="'"
> +test_expect_success UNZIP 'scalar diagnose' '
> +	scalar clone "file://$(pwd)" cloned --single-branch &&
> +	scalar diagnose cloned >out &&
> +	sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" <out >zip_path &&
> +	zip_path=$(cat zip_path) &&
> +	test -n "$zip_path" &&
> +	unzip -v "$zip_path" &&
> +	folder=${zip_path%.zip} &&
> +	test_path_is_missing "$folder" &&
> +	unzip -p "$zip_path" diagnostics.log >out &&
> +	test_file_not_empty out
> +'
> +
>  test_done
Johannes Schindelin Feb. 8, 2022, 12:08 p.m. UTC | #2
Hi René,

On Mon, 7 Feb 2022, René Scharfe wrote:

> > Note: originally, Scalar was implemented in C# using the .NET API, where
> > we had the luxury of a comprehensive standard library that includes
> > basic functionality such as writing a `.zip` file. In the C version, we
> > lack such a commodity. Rather than introducing a dependency on, say,
> > libzip, we slightly abuse Git's `archive` command: Instead of writing
> > the `.zip` file directly, we stage the file contents in a Git index of a
> > temporary, bare repository, only to let `git archive` have at it, and
> > finally removing the temporary repository.
> >
> > Also note: Due to the frequently-spawned `git hash-object` processes,
> > this command is quite a bit slow on Windows. Should it turn out to be a
> > big problem, the lack of a batch mode of the `hash-object` command could
> > potentially be worked around via using `git fast-import` with a crafted
> > `stdin`.
>
> The two paragraphs above are not in sync with the patch.

Whoopsie!

> > +	archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666);
> > +	if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) {
> > +		res = error_errno(_("could not redirect output"));
> > +		goto diagnose_cleanup;
> > +	}
> > +
> > +	init_zip_archiver();
> > +	strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL);
> > +
> > +	strbuf_reset(&buf);
> > +	strbuf_addstr(&buf,
> > +		      "--add-file-with-content=diagnostics.log:"
> > +		      "Collecting diagnostic info\n\n");
> > +	get_version_info(&buf, 1);
> > +
> > +	strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree);
> > +	off = strchr(buf.buf, ':') + 1 - buf.buf;
> > +	write_or_die(stdout_fd, buf.buf + off, buf.len - off);
> > +	strvec_push(&archiver_args, buf.buf);
>
> Fun trick to reuse the buffer for both the ZIP entry and stdout. :)  I'd
> have omitted the option from buf and added it like this, for simplicity:
>
> 	strvec_pushf(&archiver_args,
> 		     "--add-file-with-content=diagnostics.log:%s", buf.buf);
>
> Just a thought.

Oh, that's even better. I did not like that `off` pattern at all but
forgot to think of `pushf()`. Thanks!

> > +
> > +	if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) ||
> > +	    (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) ||
> > +	    (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) ||
> > +	    (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) ||
> > +	    (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0)))
> > +		goto diagnose_cleanup;
> > +
> > +	strvec_pushl(&archiver_args, "--prefix=",
> > +		     oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
> > +
> > +	/* `write_archive()` modifies the `argv` passed to it. Let it. */
> > +	argv_copy = xmemdupz(archiver_args.v,
> > +			     sizeof(char *) * archiver_args.nr);
>
> Leaking the whole thing would be fine as well for this command, but
> cleaning up is tidier, of course.
>
> > +	res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL,
> > +			    the_repository, NULL, 0);
>
> Ah -- no shell means no command line length limits. :)

Yes!!!

It also makes the command a ridiculous amount faster on Windows.

> > +	if (res) {
> > +		error(_("failed to write archive"));
> > +		goto diagnose_cleanup;
> > +	}
> > +
> > +	if (!res)
> > +		printf("\n"
> > +		       "Diagnostics complete.\n"
> > +		       "All of the gathered info is captured in '%s'\n",
> > +		       zip_path.buf);
>
> Is this message appended to the ZIP file or does it go to stdout?

It goes to `stdout`, this is for the user who runs `scalar diagnose`.

Hmm.

Now that you pointed it out, I think I want it to go to `stderr` instead.

> In any case: mixing write(2) and stdio(3) is not a good idea.  Using
> fwrite(3) instead of write_or_die above and doing the stdout dup(2)
> dance only tightly around the write_archive call would help, I think.

Sure, but let's print this message to `stderr` instead, that'll be much
cleaner, right?

Alternatively, I think I'd rather move the `printf()` below...

>
> > +
> > +diagnose_cleanup:
> > +	if (archiver_fd >= 0) {
> > +		close(1);
> > +		dup2(stdout_fd, 1);
> > +	}

... this re-redirection.

What do you think? `stdout` or `stderr`?

Thank you for your review!
Dscho
diff mbox series

Patch

diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c
index 00dcd4b50ef..30ce0799c7a 100644
--- a/contrib/scalar/scalar.c
+++ b/contrib/scalar/scalar.c
@@ -11,6 +11,7 @@ 
 #include "dir.h"
 #include "packfile.h"
 #include "help.h"
+#include "archive.h"
 
 /*
  * Remove the deepest subdirectory in the provided path string. Path must not
@@ -261,6 +262,44 @@  static int unregister_dir(void)
 	return res;
 }
 
+static int add_directory_to_archiver(struct strvec *archiver_args,
+					  const char *path, int recurse)
+{
+	int at_root = !*path;
+	DIR *dir = opendir(at_root ? "." : path);
+	struct dirent *e;
+	struct strbuf buf = STRBUF_INIT;
+	size_t len;
+	int res = 0;
+
+	if (!dir)
+		return error(_("could not open directory '%s'"), path);
+
+	if (!at_root)
+		strbuf_addf(&buf, "%s/", path);
+	len = buf.len;
+	strvec_pushf(archiver_args, "--prefix=%s", buf.buf);
+
+	while (!res && (e = readdir(dir))) {
+		if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name))
+			continue;
+
+		strbuf_setlen(&buf, len);
+		strbuf_addstr(&buf, e->d_name);
+
+		if (e->d_type == DT_REG)
+			strvec_pushf(archiver_args, "--add-file=%s", buf.buf);
+		else if (e->d_type != DT_DIR)
+			res = -1;
+		else if (recurse)
+		     add_directory_to_archiver(archiver_args, buf.buf, recurse);
+	}
+
+	closedir(dir);
+	strbuf_release(&buf);
+	return res;
+}
+
 /* printf-style interface, expects `<key>=<value>` argument */
 static int set_config(const char *fmt, ...)
 {
@@ -501,6 +540,109 @@  cleanup:
 	return res;
 }
 
+static int cmd_diagnose(int argc, const char **argv)
+{
+	struct option options[] = {
+		OPT_END(),
+	};
+	const char * const usage[] = {
+		N_("scalar diagnose [<enlistment>]"),
+		NULL
+	};
+	struct strbuf zip_path = STRBUF_INIT;
+	struct strvec archiver_args = STRVEC_INIT;
+	char **argv_copy = NULL;
+	int stdout_fd = -1, archiver_fd = -1;
+	time_t now = time(NULL);
+	struct tm tm;
+	struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT;
+	size_t off;
+	int res = 0;
+
+	argc = parse_options(argc, argv, NULL, options,
+			     usage, 0);
+
+	setup_enlistment_directory(argc, argv, usage, options, &zip_path);
+
+	strbuf_addstr(&zip_path, "/.scalarDiagnostics/scalar_");
+	strbuf_addftime(&zip_path,
+			"%Y%m%d_%H%M%S", localtime_r(&now, &tm), 0, 0);
+	strbuf_addstr(&zip_path, ".zip");
+	switch (safe_create_leading_directories(zip_path.buf)) {
+	case SCLD_EXISTS:
+	case SCLD_OK:
+		break;
+	default:
+		error_errno(_("could not create directory for '%s'"),
+			    zip_path.buf);
+		goto diagnose_cleanup;
+	}
+	stdout_fd = dup(1);
+	if (stdout_fd < 0) {
+		res = error_errno(_("could not duplicate stdout"));
+		goto diagnose_cleanup;
+	}
+
+	archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666);
+	if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) {
+		res = error_errno(_("could not redirect output"));
+		goto diagnose_cleanup;
+	}
+
+	init_zip_archiver();
+	strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL);
+
+	strbuf_reset(&buf);
+	strbuf_addstr(&buf,
+		      "--add-file-with-content=diagnostics.log:"
+		      "Collecting diagnostic info\n\n");
+	get_version_info(&buf, 1);
+
+	strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree);
+	off = strchr(buf.buf, ':') + 1 - buf.buf;
+	write_or_die(stdout_fd, buf.buf + off, buf.len - off);
+	strvec_push(&archiver_args, buf.buf);
+
+	if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) ||
+	    (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) ||
+	    (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) ||
+	    (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) ||
+	    (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0)))
+		goto diagnose_cleanup;
+
+	strvec_pushl(&archiver_args, "--prefix=",
+		     oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
+
+	/* `write_archive()` modifies the `argv` passed to it. Let it. */
+	argv_copy = xmemdupz(archiver_args.v,
+			     sizeof(char *) * archiver_args.nr);
+	res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL,
+			    the_repository, NULL, 0);
+	if (res) {
+		error(_("failed to write archive"));
+		goto diagnose_cleanup;
+	}
+
+	if (!res)
+		printf("\n"
+		       "Diagnostics complete.\n"
+		       "All of the gathered info is captured in '%s'\n",
+		       zip_path.buf);
+
+diagnose_cleanup:
+	if (archiver_fd >= 0) {
+		close(1);
+		dup2(stdout_fd, 1);
+	}
+	free(argv_copy);
+	strvec_clear(&archiver_args);
+	strbuf_release(&zip_path);
+	strbuf_release(&path);
+	strbuf_release(&buf);
+
+	return res;
+}
+
 static int cmd_list(int argc, const char **argv)
 {
 	if (argc != 1)
@@ -802,6 +944,7 @@  static struct {
 	{ "reconfigure", cmd_reconfigure },
 	{ "delete", cmd_delete },
 	{ "version", cmd_version },
+	{ "diagnose", cmd_diagnose },
 	{ NULL, NULL},
 };
 
diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt
index f416d637289..22583fe046e 100644
--- a/contrib/scalar/scalar.txt
+++ b/contrib/scalar/scalar.txt
@@ -14,6 +14,7 @@  scalar register [<enlistment>]
 scalar unregister [<enlistment>]
 scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>]
 scalar reconfigure [ --all | <enlistment> ]
+scalar diagnose [<enlistment>]
 scalar delete <enlistment>
 
 DESCRIPTION
@@ -129,6 +130,17 @@  reconfigure the enlistment.
 With the `--all` option, all enlistments currently registered with Scalar
 will be reconfigured. Use this option after each Scalar upgrade.
 
+Diagnose
+~~~~~~~~
+
+diagnose [<enlistment>]::
+    When reporting issues with Scalar, it is often helpful to provide the
+    information gathered by this command, including logs and certain
+    statistics describing the data shape of the current enlistment.
++
+The output of this command is a `.zip` file that is written into
+a directory adjacent to the worktree in the `src` directory.
+
 Delete
 ~~~~~~
 
diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh
index 9d83fdf25e8..bbd07a44426 100755
--- a/contrib/scalar/t/t9099-scalar.sh
+++ b/contrib/scalar/t/t9099-scalar.sh
@@ -90,4 +90,18 @@  test_expect_success '`scalar [...] <dir>` errors out when dir is missing' '
 	grep "cloned. does not exist" err
 '
 
+SQ="'"
+test_expect_success UNZIP 'scalar diagnose' '
+	scalar clone "file://$(pwd)" cloned --single-branch &&
+	scalar diagnose cloned >out &&
+	sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" <out >zip_path &&
+	zip_path=$(cat zip_path) &&
+	test -n "$zip_path" &&
+	unzip -v "$zip_path" &&
+	folder=${zip_path%.zip} &&
+	test_path_is_missing "$folder" &&
+	unzip -p "$zip_path" diagnostics.log >out &&
+	test_file_not_empty out
+'
+
 test_done