diff mbox series

[v8,13/15] bugreport: add packed object summary

Message ID 20200220015858.181086-14-emilyshaffer@google.com (mailing list archive)
State New, archived
Headers show
Series add git-bugreport tool | expand

Commit Message

Emily Shaffer Feb. 20, 2020, 1:58 a.m. UTC
Alongside the loose object counts, it can be useful to show the number
of packs and packed objects. This way we can check whether the repo has
an appropriate ratio of packed to loose objects to help determine
whether it's behaving correctly.

Add a utility to easily traverse all packfiles in a given repository.
Use it in packfile.c and remove a redundant call to
prepare_packed_git(), which is already called in get_all_packs().

Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
---
 Documentation/git-bugreport.txt |  1 +
 bugreport.c                     | 30 ++++++++++++++++++++++++++++++
 object-store.h                  |  6 ++++++
 packfile.c                      |  3 +--
 4 files changed, 38 insertions(+), 2 deletions(-)

Comments

Junio C Hamano Feb. 20, 2020, 10:04 p.m. UTC | #1
Emily Shaffer <emilyshaffer@google.com> writes:

> +static void get_packed_object_summary(struct strbuf *obj_info, int nongit)
> +{
> +	struct packed_git *pack = NULL;
> +	int pack_count = 0;
> +	int object_count = 0;
> +
> +	if (nongit) {
> +		strbuf_addstr(obj_info,
> +			"not run from a git repository - no objects to show\n");
> +		return;
> +	}
> +
> +	for_each_pack(the_repository, pack) {
> +		pack_count++;
> +		/*
> +		 * To accurately count how many objects are packed, look inside
> +		 * the packfile's index.
> +		 */
> +		open_pack_index(pack);
> +		object_count += pack->num_objects;
> +	}
> +
> +	strbuf_addf(obj_info, "%d total packs (%d objects)\n", pack_count,
> +		    object_count);
> +
> +}

Makes sense.

> @@ -447,4 +448,9 @@ int for_each_object_in_pack(struct packed_git *p,
>  int for_each_packed_object(each_packed_object_fn, void *,
>  			   enum for_each_object_flags flags);
>  
> +#define for_each_pack(repo, pack) 		\
> +		for (pack = get_all_packs(repo);\
> +		     pack;			\
> +		     pack = pack->next)

I generally avoid #define'ing a control loop pseudo-syntax unless it
makes the resulting code hide overly ugly implementation detail.

for_each_string_list() is there to hide the fact that items are
stored in an embedded array whose name is .items and size is .nr
that is sufficiently ugnly to expose, but iterating over packs
does not look so bad.

If you MUST have this as a pseudo-syntax macro, we need

 - to match for_each_string_list_item(), have iterating variable
   'pack' as the first parameter, and the scope of what's iterated
   'repo' as the second.

 - to make sure the syntax works correctly even if a parameter is
   *not* a simple identifier (I think the above is OK, but there may
   be cases that it does not work well).

Regarding the latter, the way 'item' is incremented at the end of
iteration in for_each_string_list_item() is subtle and correct.

#define for_each_string_list_item(item,list)            \
	for (item = (list)->items;                      \
	     item && item < (list)->items + (list)->nr; \
	     ++item)

You would break it if you changed pre-increment to post-increment
for a user like this:

	struct string_list *list;
	struct string_list_item *i, **p;
	p = &i;

	for_each_string_list_item(*p, list) {
		...
	}

because ++*p is ++(*p), while *p++ is (*p)++, and we do want the
former (i.e. increment the memory cell pointed at by pointer p).

Personally, I would prefer not to introduce this macro if I were
working on this topic.

>  #endif /* OBJECT_STORE_H */
> diff --git a/packfile.c b/packfile.c
> index 99dd1a7d09..95afcc1187 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -2095,8 +2095,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
>  	int r = 0;
>  	int pack_errors = 0;
>  
> -	prepare_packed_git(the_repository);
> -	for (p = get_all_packs(the_repository); p; p = p->next) {
> +	for_each_pack(the_repository, p) {
>  		if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
>  			continue;
>  		if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
Emily Shaffer Feb. 25, 2020, 11:58 p.m. UTC | #2
On Thu, Feb 20, 2020 at 02:04:52PM -0800, Junio C Hamano wrote:
> Emily Shaffer <emilyshaffer@google.com> writes:
> 
> > +static void get_packed_object_summary(struct strbuf *obj_info, int nongit)
> > +{
> > +	struct packed_git *pack = NULL;
> > +	int pack_count = 0;
> > +	int object_count = 0;
> > +
> > +	if (nongit) {
> > +		strbuf_addstr(obj_info,
> > +			"not run from a git repository - no objects to show\n");
> > +		return;
> > +	}
> > +
> > +	for_each_pack(the_repository, pack) {
> > +		pack_count++;
> > +		/*
> > +		 * To accurately count how many objects are packed, look inside
> > +		 * the packfile's index.
> > +		 */
> > +		open_pack_index(pack);
> > +		object_count += pack->num_objects;
> > +	}
> > +
> > +	strbuf_addf(obj_info, "%d total packs (%d objects)\n", pack_count,
> > +		    object_count);
> > +
> > +}
> 
> Makes sense.
> 
> > @@ -447,4 +448,9 @@ int for_each_object_in_pack(struct packed_git *p,
> >  int for_each_packed_object(each_packed_object_fn, void *,
> >  			   enum for_each_object_flags flags);
> >  
> > +#define for_each_pack(repo, pack) 		\
> > +		for (pack = get_all_packs(repo);\
> > +		     pack;			\
> > +		     pack = pack->next)
> 
> I generally avoid #define'ing a control loop pseudo-syntax unless it
> makes the resulting code hide overly ugly implementation detail.
> 
> for_each_string_list() is there to hide the fact that items are
> stored in an embedded array whose name is .items and size is .nr
> that is sufficiently ugnly to expose, but iterating over packs
> does not look so bad.
> 
> If you MUST have this as a pseudo-syntax macro, we need
> 
>  - to match for_each_string_list_item(), have iterating variable
>    'pack' as the first parameter, and the scope of what's iterated
>    'repo' as the second.
> 
>  - to make sure the syntax works correctly even if a parameter is
>    *not* a simple identifier (I think the above is OK, but there may
>    be cases that it does not work well).
> 
> Regarding the latter, the way 'item' is incremented at the end of
> iteration in for_each_string_list_item() is subtle and correct.
> 
> #define for_each_string_list_item(item,list)            \
> 	for (item = (list)->items;                      \
> 	     item && item < (list)->items + (list)->nr; \
> 	     ++item)
> 
> You would break it if you changed pre-increment to post-increment
> for a user like this:
> 
> 	struct string_list *list;
> 	struct string_list_item *i, **p;
> 	p = &i;
> 
> 	for_each_string_list_item(*p, list) {
> 		...
> 	}
> 
> because ++*p is ++(*p), while *p++ is (*p)++, and we do want the
> former (i.e. increment the memory cell pointed at by pointer p).
> 
> Personally, I would prefer not to introduce this macro if I were
> working on this topic.

Ah, I thought this is the kind of thing you meant here[1]. But I agree
with what you point out about the shortcomings of this kind of macro.
The implementation details are not so ugly; I'll drop it.

 - Emily

[1] https://lore.kernel.org/git/xmqq8sli89eu.fsf@gitster-ct.c.googlers.com

> 
> >  #endif /* OBJECT_STORE_H */
> > diff --git a/packfile.c b/packfile.c
> > index 99dd1a7d09..95afcc1187 100644
> > --- a/packfile.c
> > +++ b/packfile.c
> > @@ -2095,8 +2095,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
> >  	int r = 0;
> >  	int pack_errors = 0;
> >  
> > -	prepare_packed_git(the_repository);
> > -	for (p = get_all_packs(the_repository); p; p = p->next) {
> > +	for_each_pack(the_repository, p) {
> >  		if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
> >  			continue;
> >  		if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
diff mbox series

Patch

diff --git a/Documentation/git-bugreport.txt b/Documentation/git-bugreport.txt
index 4fe1c60506..eb41f0677f 100644
--- a/Documentation/git-bugreport.txt
+++ b/Documentation/git-bugreport.txt
@@ -33,6 +33,7 @@  The following information is captured automatically:
  - Selected config values
  - A list of enabled hooks
  - The number of loose objects in the repository
+ - The number of packs and packed objects in the repository
 
 This tool is invoked via the typical Git setup process, which means that in some
 cases, it might not be able to launch - for example, if a relevant config file
diff --git a/bugreport.c b/bugreport.c
index fb7bc72723..71191f1331 100644
--- a/bugreport.c
+++ b/bugreport.c
@@ -177,6 +177,33 @@  static void get_loose_object_summary(struct strbuf *obj_info, int nongit) {
 		    total_count_questionable ? " (problem during count)" : "");
 }
 
+static void get_packed_object_summary(struct strbuf *obj_info, int nongit)
+{
+	struct packed_git *pack = NULL;
+	int pack_count = 0;
+	int object_count = 0;
+
+	if (nongit) {
+		strbuf_addstr(obj_info,
+			"not run from a git repository - no objects to show\n");
+		return;
+	}
+
+	for_each_pack(the_repository, pack) {
+		pack_count++;
+		/*
+		 * To accurately count how many objects are packed, look inside
+		 * the packfile's index.
+		 */
+		open_pack_index(pack);
+		object_count += pack->num_objects;
+	}
+
+	strbuf_addf(obj_info, "%d total packs (%d objects)\n", pack_count,
+		    object_count);
+
+}
+
 static const char * const bugreport_usage[] = {
 	N_("git bugreport [-o|--output-directory <file>] [-s|--suffix <format>]"),
 	NULL
@@ -271,6 +298,9 @@  int cmd_main(int argc, const char **argv)
 	get_header(&buffer, "Loose Object Counts");
 	get_loose_object_summary(&buffer, nongit_ok);
 
+	get_header(&buffer, "Packed Object Summary");
+	get_packed_object_summary(&buffer, nongit_ok);
+
 	report = fopen_for_writing(report_path.buf);
 
 	if (report == NULL) {
diff --git a/object-store.h b/object-store.h
index 5b047637e3..f881a1756e 100644
--- a/object-store.h
+++ b/object-store.h
@@ -7,6 +7,7 @@ 
 #include "sha1-array.h"
 #include "strbuf.h"
 #include "thread-utils.h"
+#include "packfile.h"
 
 struct object_directory {
 	struct object_directory *next;
@@ -447,4 +448,9 @@  int for_each_object_in_pack(struct packed_git *p,
 int for_each_packed_object(each_packed_object_fn, void *,
 			   enum for_each_object_flags flags);
 
+#define for_each_pack(repo, pack) 		\
+		for (pack = get_all_packs(repo);\
+		     pack;			\
+		     pack = pack->next)
+
 #endif /* OBJECT_STORE_H */
diff --git a/packfile.c b/packfile.c
index 99dd1a7d09..95afcc1187 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2095,8 +2095,7 @@  int for_each_packed_object(each_packed_object_fn cb, void *data,
 	int r = 0;
 	int pack_errors = 0;
 
-	prepare_packed_git(the_repository);
-	for (p = get_all_packs(the_repository); p; p = p->next) {
+	for_each_pack(the_repository, p) {
 		if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
 			continue;
 		if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&