diff mbox series

[v8,12/15] bugreport: count loose objects

Message ID 20200220015858.181086-13-emilyshaffer@google.com (mailing list archive)
State New, archived
Headers show
Series add git-bugreport tool | expand

Commit Message

Emily Shaffer Feb. 20, 2020, 1:58 a.m. UTC
The number of unpacked objects in a user's repository may help us
understand the root of the problem they're seeing, especially if a
command is running unusually slowly.

Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
---
 Documentation/git-bugreport.txt |  1 +
 bugreport.c                     | 52 +++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

Comments

Junio C Hamano Feb. 20, 2020, 9:04 p.m. UTC | #1
Emily Shaffer <emilyshaffer@google.com> writes:

> The number of unpacked objects in a user's repository may help us
> understand the root of the problem they're seeing, especially if a
> command is running unusually slowly.
>
> Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
> Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
> ---
>  Documentation/git-bugreport.txt |  1 +
>  bugreport.c                     | 52 +++++++++++++++++++++++++++++++++
>  2 files changed, 53 insertions(+)
>
> diff --git a/Documentation/git-bugreport.txt b/Documentation/git-bugreport.txt
> index 4d01528540..4fe1c60506 100644
> --- a/Documentation/git-bugreport.txt
> +++ b/Documentation/git-bugreport.txt
> @@ -32,6 +32,7 @@ The following information is captured automatically:
>   - $SHELL
>   - Selected config values
>   - A list of enabled hooks
> + - The number of loose objects in the repository
>  
>  This tool is invoked via the typical Git setup process, which means that in some
>  cases, it might not be able to launch - for example, if a relevant config file
> diff --git a/bugreport.c b/bugreport.c
> index b5a0714a7f..fb7bc72723 100644
> --- a/bugreport.c
> +++ b/bugreport.c
> @@ -10,6 +10,7 @@
>  #include "bugreport-config-safelist.h"
>  #include "khash.h"
>  #include "run-command.h"
> +#include "object-store.h"
>  
>  static void get_git_remote_https_version_info(struct strbuf *version_info)
>  {
> @@ -128,6 +129,54 @@ static void get_populated_hooks(struct strbuf *hook_info, int nongit)
>  	}
>  }
>  
> +static int loose_object_cb(const struct object_id *oid, const char *path,
> +			   void *data) {
> +	int *loose_object_count = data;
> +
> +	if (loose_object_count) {
> +		(*loose_object_count)++;
> +		return 0;
> +	}
> +
> +	return 1;

What is the point of returning 1 here to abort the iteration early?
Wouldn't it be a BUG() if this callback ends up gettting called with
NULL in data?

> +}
> +
> +static void get_loose_object_summary(struct strbuf *obj_info, int nongit) {
> +
> +	int local_loose_object_count = 0, total_loose_object_count = 0;
> +	int local_count_questionable = 0, total_count_questionable = 0;
> +
> +	if (nongit) {
> +		strbuf_addstr(obj_info,
> +			"not run from a git repository - no objects to show\n");
> +		return;
> +	}
> +
> +	local_count_questionable = for_each_loose_object(
> +					loose_object_cb,
> +					&local_loose_object_count,
> +					FOR_EACH_OBJECT_LOCAL_ONLY);
> +
> +	total_count_questionable = for_each_loose_object(
> +					loose_object_cb,
> +					&total_loose_object_count,
> +					0);
> +	strbuf_addf(obj_info, "%d local loose objects%s\n",
> +		    local_loose_object_count,
> +		    local_count_questionable ? " (problem during count)" : "");
> +
> +	strbuf_addf(obj_info, "%d alternate loose objects%s\n",
> +		    total_loose_object_count - local_loose_object_count,
> +		    (local_count_questionable || total_count_questionable)
> +			? " (problem during count)"
> +			: "");
> +
> +	strbuf_addf(obj_info, "%d total loose objects%s\n",
> +		    total_loose_object_count,
> +		    total_count_questionable ? " (problem during count)" : "");
> +}
> +
>  static const char * const bugreport_usage[] = {
>  	N_("git bugreport [-o|--output-directory <file>] [-s|--suffix <format>]"),
>  	NULL
> @@ -219,6 +268,9 @@ int cmd_main(int argc, const char **argv)
>  	get_header(&buffer, "Enabled Hooks");
>  	get_populated_hooks(&buffer, nongit_ok);
>  
> +	get_header(&buffer, "Loose Object Counts");
> +	get_loose_object_summary(&buffer, nongit_ok);
> +
>  	report = fopen_for_writing(report_path.buf);
>  
>  	if (report == NULL) {
Emily Shaffer Feb. 25, 2020, 11:22 p.m. UTC | #2
On Thu, Feb 20, 2020 at 01:04:33PM -0800, Junio C Hamano wrote:
> Emily Shaffer <emilyshaffer@google.com> writes:
> 
> > The number of unpacked objects in a user's repository may help us
> > understand the root of the problem they're seeing, especially if a
> > command is running unusually slowly.
> >
> > Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
> > Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
> > ---
> >  Documentation/git-bugreport.txt |  1 +
> >  bugreport.c                     | 52 +++++++++++++++++++++++++++++++++
> >  2 files changed, 53 insertions(+)
> >
> > diff --git a/Documentation/git-bugreport.txt b/Documentation/git-bugreport.txt
> > index 4d01528540..4fe1c60506 100644
> > --- a/Documentation/git-bugreport.txt
> > +++ b/Documentation/git-bugreport.txt
> > @@ -32,6 +32,7 @@ The following information is captured automatically:
> >   - $SHELL
> >   - Selected config values
> >   - A list of enabled hooks
> > + - The number of loose objects in the repository
> >  
> >  This tool is invoked via the typical Git setup process, which means that in some
> >  cases, it might not be able to launch - for example, if a relevant config file
> > diff --git a/bugreport.c b/bugreport.c
> > index b5a0714a7f..fb7bc72723 100644
> > --- a/bugreport.c
> > +++ b/bugreport.c
> > @@ -10,6 +10,7 @@
> >  #include "bugreport-config-safelist.h"
> >  #include "khash.h"
> >  #include "run-command.h"
> > +#include "object-store.h"
> >  
> >  static void get_git_remote_https_version_info(struct strbuf *version_info)
> >  {
> > @@ -128,6 +129,54 @@ static void get_populated_hooks(struct strbuf *hook_info, int nongit)
> >  	}
> >  }
> >  
> > +static int loose_object_cb(const struct object_id *oid, const char *path,
> > +			   void *data) {
> > +	int *loose_object_count = data;
> > +
> > +	if (loose_object_count) {
> > +		(*loose_object_count)++;
> > +		return 0;
> > +	}
> > +
> > +	return 1;
> 
> What is the point of returning 1 here to abort the iteration early?
> Wouldn't it be a BUG() if this callback ends up gettting called with
> NULL in data?

Hrm. Wouldn't BUG() throw away the rest of the generated report?

Maybe it's better, in NULL case, to indicate an error occurred there and
write that information into the report, and continue gathering the rest
of the info.
Emily Shaffer Feb. 25, 2020, 11:26 p.m. UTC | #3
On Tue, Feb 25, 2020 at 03:22:00PM -0800, Emily Shaffer wrote:
> On Thu, Feb 20, 2020 at 01:04:33PM -0800, Junio C Hamano wrote:
> > Emily Shaffer <emilyshaffer@google.com> writes:
> > 
> > > The number of unpacked objects in a user's repository may help us
> > > understand the root of the problem they're seeing, especially if a
> > > command is running unusually slowly.
> > >
> > > Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
> > > Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
> > > ---
> > >  Documentation/git-bugreport.txt |  1 +
> > >  bugreport.c                     | 52 +++++++++++++++++++++++++++++++++
> > >  2 files changed, 53 insertions(+)
> > >
> > > diff --git a/Documentation/git-bugreport.txt b/Documentation/git-bugreport.txt
> > > index 4d01528540..4fe1c60506 100644
> > > --- a/Documentation/git-bugreport.txt
> > > +++ b/Documentation/git-bugreport.txt
> > > @@ -32,6 +32,7 @@ The following information is captured automatically:
> > >   - $SHELL
> > >   - Selected config values
> > >   - A list of enabled hooks
> > > + - The number of loose objects in the repository
> > >  
> > >  This tool is invoked via the typical Git setup process, which means that in some
> > >  cases, it might not be able to launch - for example, if a relevant config file
> > > diff --git a/bugreport.c b/bugreport.c
> > > index b5a0714a7f..fb7bc72723 100644
> > > --- a/bugreport.c
> > > +++ b/bugreport.c
> > > @@ -10,6 +10,7 @@
> > >  #include "bugreport-config-safelist.h"
> > >  #include "khash.h"
> > >  #include "run-command.h"
> > > +#include "object-store.h"
> > >  
> > >  static void get_git_remote_https_version_info(struct strbuf *version_info)
> > >  {
> > > @@ -128,6 +129,54 @@ static void get_populated_hooks(struct strbuf *hook_info, int nongit)
> > >  	}
> > >  }
> > >  
> > > +static int loose_object_cb(const struct object_id *oid, const char *path,
> > > +			   void *data) {
> > > +	int *loose_object_count = data;
> > > +
> > > +	if (loose_object_count) {
> > > +		(*loose_object_count)++;
> > > +		return 0;
> > > +	}
> > > +
> > > +	return 1;
> > 
> > What is the point of returning 1 here to abort the iteration early?
> > Wouldn't it be a BUG() if this callback ends up gettting called with
> > NULL in data?
> 
> Hrm. Wouldn't BUG() throw away the rest of the generated report?
> 
> Maybe it's better, in NULL case, to indicate an error occurred there and
> write that information into the report, and continue gathering the rest
> of the info.

Oh, hm. Now that I look twice, you're saying "bugreport gave its own
callback a NULL". I guess it could also be that
for_each_loose_object() decided to give a NULL context instead of the
one we gave...

Even so, I dislike the idea of BUG() called from the tool you are trying
to use to report a BUG() message. We do already report if 1 was returned
"(problem during count)". I suppose I can try and make a more specific
error if we know the callback data was definitely the problem.

 - Emily
diff mbox series

Patch

diff --git a/Documentation/git-bugreport.txt b/Documentation/git-bugreport.txt
index 4d01528540..4fe1c60506 100644
--- a/Documentation/git-bugreport.txt
+++ b/Documentation/git-bugreport.txt
@@ -32,6 +32,7 @@  The following information is captured automatically:
  - $SHELL
  - Selected config values
  - A list of enabled hooks
+ - The number of loose objects in the repository
 
 This tool is invoked via the typical Git setup process, which means that in some
 cases, it might not be able to launch - for example, if a relevant config file
diff --git a/bugreport.c b/bugreport.c
index b5a0714a7f..fb7bc72723 100644
--- a/bugreport.c
+++ b/bugreport.c
@@ -10,6 +10,7 @@ 
 #include "bugreport-config-safelist.h"
 #include "khash.h"
 #include "run-command.h"
+#include "object-store.h"
 
 static void get_git_remote_https_version_info(struct strbuf *version_info)
 {
@@ -128,6 +129,54 @@  static void get_populated_hooks(struct strbuf *hook_info, int nongit)
 	}
 }
 
+static int loose_object_cb(const struct object_id *oid, const char *path,
+			   void *data) {
+	int *loose_object_count = data;
+
+	if (loose_object_count) {
+		(*loose_object_count)++;
+		return 0;
+	}
+
+	return 1;
+}
+
+static void get_loose_object_summary(struct strbuf *obj_info, int nongit) {
+
+	int local_loose_object_count = 0, total_loose_object_count = 0;
+	int local_count_questionable = 0, total_count_questionable = 0;
+
+	if (nongit) {
+		strbuf_addstr(obj_info,
+			"not run from a git repository - no objects to show\n");
+		return;
+	}
+
+	local_count_questionable = for_each_loose_object(
+					loose_object_cb,
+					&local_loose_object_count,
+					FOR_EACH_OBJECT_LOCAL_ONLY);
+
+	total_count_questionable = for_each_loose_object(
+					loose_object_cb,
+					&total_loose_object_count,
+					0);
+
+	strbuf_addf(obj_info, "%d local loose objects%s\n",
+		    local_loose_object_count,
+		    local_count_questionable ? " (problem during count)" : "");
+
+	strbuf_addf(obj_info, "%d alternate loose objects%s\n",
+		    total_loose_object_count - local_loose_object_count,
+		    (local_count_questionable || total_count_questionable)
+			? " (problem during count)"
+			: "");
+
+	strbuf_addf(obj_info, "%d total loose objects%s\n",
+		    total_loose_object_count,
+		    total_count_questionable ? " (problem during count)" : "");
+}
+
 static const char * const bugreport_usage[] = {
 	N_("git bugreport [-o|--output-directory <file>] [-s|--suffix <format>]"),
 	NULL
@@ -219,6 +268,9 @@  int cmd_main(int argc, const char **argv)
 	get_header(&buffer, "Enabled Hooks");
 	get_populated_hooks(&buffer, nongit_ok);
 
+	get_header(&buffer, "Loose Object Counts");
+	get_loose_object_summary(&buffer, nongit_ok);
+
 	report = fopen_for_writing(report_path.buf);
 
 	if (report == NULL) {