diff mbox series

[v4,12/15] bugreport: count loose objects

Message ID 20191213004312.169753-13-emilyshaffer@google.com (mailing list archive)
State New, archived
Headers show
Series [v4,01/15] bugreport: add tool to generate debugging info | expand

Commit Message

Emily Shaffer Dec. 13, 2019, 12:43 a.m. UTC
The number of unpacked objects in a user's repository may help us
understand the root of the problem they're seeing, especially if a
command is running unusually slowly.

Rather than directly invoking 'git-count-objects', which may sometimes
fail unexpectedly on Git for Windows, manually count the contents of
.git/objects. Additionally, since we may wish to inspect other
directories' contents for bugreport in the future, put the directory
listing into a helper function.

Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
---
 bugreport.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

Comments

Junio C Hamano Dec. 13, 2019, 9:51 p.m. UTC | #1
Emily Shaffer <emilyshaffer@google.com> writes:

> The number of unpacked objects in a user's repository may help us
> understand the root of the problem they're seeing, especially if a
> command is running unusually slowly.
>
> Rather than directly invoking 'git-count-objects', which may sometimes
> fail unexpectedly on Git for Windows, manually count the contents of
> .git/objects.

Is for_each_loose_object() or for_each_loose_file_in_objdir() not
sufficient?  We really do *not* want a redundant implementation of
something we already use elsewhere in the system, especially for the
purpose of this program, because you would end up reporting what you
computed in a way that may be quite different from what the rest of
the system that is actually used by the end users computes.
Emily Shaffer Dec. 16, 2019, 11:54 p.m. UTC | #2
On Fri, Dec 13, 2019 at 01:51:27PM -0800, Junio C Hamano wrote:
> Emily Shaffer <emilyshaffer@google.com> writes:
> 
> > The number of unpacked objects in a user's repository may help us
> > understand the root of the problem they're seeing, especially if a
> > command is running unusually slowly.
> >
> > Rather than directly invoking 'git-count-objects', which may sometimes
> > fail unexpectedly on Git for Windows, manually count the contents of
> > .git/objects.
> 
> Is for_each_loose_object() or for_each_loose_file_in_objdir() not
> sufficient?  We really do *not* want a redundant implementation of
> something we already use elsewhere in the system, especially for the
> purpose of this program, because you would end up reporting what you
> computed in a way that may be quite different from what the rest of
> the system that is actually used by the end users computes.
> 
Oh, thanks for the pointer. That looks sufficient - I will investigate
the differences between the two and stop filesystem browsing by hand in
this case.

 - Emily
diff mbox series

Patch

diff --git a/bugreport.c b/bugreport.c
index f89cb8d754..3abb83d77f 100644
--- a/bugreport.c
+++ b/bugreport.c
@@ -173,6 +173,67 @@  static void get_populated_hooks(struct strbuf *hook_info)
 	}
 }
 
+static int is_hex(const char *string, size_t count)
+{
+	for (; count; string++, count--) {
+		if (!isxdigit(*string))
+			return 0;
+	}
+	return 1;
+}
+
+static void get_loose_object_summary(struct strbuf *obj_info) {
+	struct dirent *d = NULL;
+	DIR *dir, *subdir = NULL;
+	size_t dir_len;
+	struct strbuf dirpath = STRBUF_INIT;
+
+	strbuf_addstr(&dirpath, get_object_directory());
+	strbuf_complete(&dirpath, '/');
+
+	dir = opendir(dirpath.buf);
+	if (!dir) {
+		strbuf_addf(obj_info, "could not open object directory '%s'\n",
+			    dirpath.buf);
+		strbuf_release(&dirpath);
+		return;
+	}
+
+	dir_len = dirpath.len;
+
+	while ((d = readdir(dir))) {
+		int object_count = 0;
+		char subdir_name[3];
+
+		if (d->d_type != DT_DIR)
+			continue;
+
+		if ((strlen(d->d_name) != 2) || (!is_hex(d->d_name, 2)))
+			continue;
+
+		/* copy directory name + \0 */
+		memcpy(subdir_name, d->d_name, 3);
+
+		strbuf_setlen(&dirpath, dir_len);
+		strbuf_addstr(&dirpath, d->d_name);
+
+		subdir = opendir(dirpath.buf);
+		if (!subdir)
+			continue;
+		while ((d = readdir(subdir)))
+			if (d->d_type == DT_REG)
+				object_count++;
+
+		closedir(subdir);
+
+		strbuf_addf(obj_info, "%s: %d\n", subdir_name, object_count);
+	}
+
+
+	closedir(dir);
+	strbuf_release(&dirpath);
+}
+
 static const char * const bugreport_usage[] = {
 	N_("git bugreport [-o|--output <file>]"),
 	NULL
@@ -243,6 +304,9 @@  int cmd_main(int argc, const char **argv)
 	get_header(&buffer, "Configured Hooks");
 	get_populated_hooks(&buffer);
 
+	get_header(&buffer, "Loose Object Counts");
+	get_loose_object_summary(&buffer);
+
 	report = fopen_for_writing(report_path.buf);
 	strbuf_write(&buffer, report);
 	fclose(report);