[2/6] commit-graph write: add more progress output
diff mbox series

Message ID 20181120150443.12997-3-avarab@gmail.com
State New
Headers show
Series
  • [1/6] commit-graph write: rephrase confusing progress output
Related show

Commit Message

Ævar Arnfjörð Bjarmason Nov. 20, 2018, 3:04 p.m. UTC
Add more progress output to the output already added in
7b0f229222 ("commit-graph write: add progress output", 2018-09-17).

As noted in that commit most of the progress output isn't displayed on
small repositories, but before this change we'd noticeably hang for
2-3 seconds at the end on medium sized repositories such as linux.git.

Now we'll instead show output like this, and have no human-observable
point at which we're not producing progress output:

    $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
    Finding commits for commit graph: 6418991, done.
    Computing commit graph generation numbers: 100% (797205/797205), done.
    Writing out commit graph chunks: 2399861, done.

This "graph chunks" number is not meant to be meaningful to the user,
but just to show that we're doing work and the command isn't
hanging.

On a much larger in-house repository I have we'll show (note how we
also say "Annotating[...]"):

    $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
    Finding commits for commit graph: 48271163, done.
    Annotating commit graph: 21424536, done.
    Computing commit graph generation numbers: 100% (7141512/7141512), done.
    Writing out commit graph chunks: 21424913, done.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 commit-graph.c | 47 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

Comments

SZEDER Gábor Nov. 20, 2018, 4:58 p.m. UTC | #1
On Tue, Nov 20, 2018 at 03:04:39PM +0000, Ævar Arnfjörð Bjarmason wrote:
> Add more progress output to the output already added in
> 7b0f229222 ("commit-graph write: add progress output", 2018-09-17).
> 
> As noted in that commit most of the progress output isn't displayed on
> small repositories, but before this change we'd noticeably hang for
> 2-3 seconds at the end on medium sized repositories such as linux.git.
> 
> Now we'll instead show output like this, and have no human-observable
> point at which we're not producing progress output:
> 
>     $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
>     Finding commits for commit graph: 6418991, done.
>     Computing commit graph generation numbers: 100% (797205/797205), done.
>     Writing out commit graph chunks: 2399861, done.
> 
> This "graph chunks" number is not meant to be meaningful to the user,
> but just to show that we're doing work and the command isn't
> hanging.
> 
> On a much larger in-house repository I have we'll show (note how we
> also say "Annotating[...]"):
> 
>     $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
>     Finding commits for commit graph: 48271163, done.
>     Annotating commit graph: 21424536, done.
>     Computing commit graph generation numbers: 100% (7141512/7141512), done.
>     Writing out commit graph chunks: 21424913, done.

That's a lot of chunks, but according to the specs, there are only 3
or 4 chunks in a commit-graph file.  More on this below.

> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
>  commit-graph.c | 47 ++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 38 insertions(+), 9 deletions(-)
> 
> diff --git a/commit-graph.c b/commit-graph.c
> index e6d0d7722b..afce20dd4d 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -433,7 +433,9 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit
>  
>  static void write_graph_chunk_fanout(struct hashfile *f,
>  				     struct commit **commits,
> -				     int nr_commits)
> +				     int nr_commits,
> +				     struct progress *progress,
> +				     uint64_t *progress_cnt)
>  {
>  	int i, count = 0;
>  	struct commit **list = commits;
> @@ -445,6 +447,8 @@ static void write_graph_chunk_fanout(struct hashfile *f,
>  	 */
>  	for (i = 0; i < 256; i++) {
>  		while (count < nr_commits) {
> +			if (progress)
> +				display_progress(progress, ++*progress_cnt);

The condition is unnecessary, display_progress() is prepared to deal
with a NULL progress pointer.  The same applies to all such calls in
this patch.

>  			if ((*list)->object.oid.hash[0] != i)
>  				break;
>  			count++;
> @@ -456,12 +460,17 @@ static void write_graph_chunk_fanout(struct hashfile *f,
>  }
>  
>  static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
> -				   struct commit **commits, int nr_commits)
> +				   struct commit **commits, int nr_commits,
> +				   struct progress *progress,
> +				   uint64_t *progress_cnt)
>  {
>  	struct commit **list = commits;
>  	int count;
> -	for (count = 0; count < nr_commits; count++, list++)
> +	for (count = 0; count < nr_commits; count++, list++) {
> +		if (progress)
> +			display_progress(progress, ++*progress_cnt);
>  		hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
> +	}
>  }
>  
>  static const unsigned char *commit_to_sha1(size_t index, void *table)
> @@ -471,7 +480,9 @@ static const unsigned char *commit_to_sha1(size_t index, void *table)
>  }
>  
>  static void write_graph_chunk_data(struct hashfile *f, int hash_len,
> -				   struct commit **commits, int nr_commits)
> +				   struct commit **commits, int nr_commits,
> +				   struct progress *progress,
> +				   uint64_t *progress_cnt)
>  {
>  	struct commit **list = commits;
>  	struct commit **last = commits + nr_commits;
> @@ -482,6 +493,9 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
>  		int edge_value;
>  		uint32_t packedDate[2];
>  
> +		if (progress)
> +			display_progress(progress, ++*progress_cnt);
> +
>  		parse_commit(*list);
>  		hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
>  
> @@ -542,7 +556,9 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
>  
>  static void write_graph_chunk_large_edges(struct hashfile *f,
>  					  struct commit **commits,
> -					  int nr_commits)
> +					  int nr_commits,
> +					  struct progress *progress,
> +					  uint64_t *progress_cnt)
>  {
>  	struct commit **list = commits;
>  	struct commit **last = commits + nr_commits;
> @@ -566,6 +582,9 @@ static void write_graph_chunk_large_edges(struct hashfile *f,
>  						  nr_commits,
>  						  commit_to_sha1);
>  
> +			if (progress)
> +				display_progress(progress, ++*progress_cnt);
> +
>  			if (edge_value < 0)
>  				edge_value = GRAPH_PARENT_MISSING;
>  			else if (!parent->next)
> @@ -764,6 +783,7 @@ void write_commit_graph(const char *obj_dir,
>  	int num_extra_edges;
>  	struct commit_list *parent;
>  	struct progress *progress = NULL;
> +	uint64_t progress_cnt;
>  
>  	if (!commit_graph_compatible(the_repository))
>  		return;
> @@ -937,10 +957,19 @@ void write_commit_graph(const char *obj_dir,
>  		hashwrite(f, chunk_write, 12);
>  	}
>  
> -	write_graph_chunk_fanout(f, commits.list, commits.nr);
> -	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
> -	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
> -	write_graph_chunk_large_edges(f, commits.list, commits.nr);
> +	if (report_progress)
> +		progress = start_delayed_progress(
> +			_("Writing out commit graph chunks"),
> +			progress_cnt = 0);

First, this is an unusual place to set a variable.

Second, as mentioned above, there are only 3 or 4 chunks, therefore I
think this should only say "Writing out commit graph".

Finally, each of the write_graph_chunk_*() functions called below
iterate over all commits, so we know and thus can show the total in
advance.

So how about something like the patch below on top?  Note that I had
to shift two display_progress() calls a couple of lines, because
otherwise the numbers didn't add up.

Just to get you thinking and to have something to try out, but I saw a
bit of weirdness while at it, and want to look into it, but now I've
got to go...


> +	write_graph_chunk_fanout(f, commits.list, commits.nr, progress,
> +				 &progress_cnt);
> +	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr,
> +			       progress, &progress_cnt);
> +	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr,
> +			       progress, &progress_cnt);
> +	write_graph_chunk_large_edges(f, commits.list, commits.nr, progress,
> +				      &progress_cnt);
> +	stop_progress(&progress);
>  
>  	close_commit_graph(the_repository);
>  	finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);



diff --git a/commit-graph.c b/commit-graph.c
index 2e2eaa24ca..2f3417db32 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -447,10 +447,9 @@ static void write_graph_chunk_fanout(struct hashfile *f,
 	 */
 	for (i = 0; i < 256; i++) {
 		while (count < nr_commits) {
-			if (progress)
-				display_progress(progress, ++*progress_cnt);
 			if ((*list)->object.oid.hash[0] != i)
 				break;
+			display_progress(progress, ++*progress_cnt);
 			count++;
 			list++;
 		}
@@ -467,8 +466,7 @@ static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
 	struct commit **list = commits;
 	int count;
 	for (count = 0; count < nr_commits; count++, list++) {
-		if (progress)
-			display_progress(progress, ++*progress_cnt);
+		display_progress(progress, ++*progress_cnt);
 		hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
 	}
 }
@@ -493,8 +491,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
 		int edge_value;
 		uint32_t packedDate[2];
 
-		if (progress)
-			display_progress(progress, ++*progress_cnt);
+		display_progress(progress, ++*progress_cnt);
 
 		parse_commit(*list);
 		hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
@@ -570,6 +567,8 @@ static void write_graph_chunk_large_edges(struct hashfile *f,
 		     parent = parent->next)
 			num_parents++;
 
+		display_progress(progress, ++*progress_cnt);
+
 		if (num_parents <= 2) {
 			list++;
 			continue;
@@ -582,9 +581,6 @@ static void write_graph_chunk_large_edges(struct hashfile *f,
 						  nr_commits,
 						  commit_to_sha1);
 
-			if (progress)
-				display_progress(progress, ++*progress_cnt);
-
 			if (edge_value < 0)
 				edge_value = GRAPH_PARENT_MISSING;
 			else if (!parent->next)
@@ -986,10 +982,11 @@ void write_commit_graph(const char *obj_dir,
 		hashwrite(f, chunk_write, 12);
 	}
 
-	if (report_progress)
+	if (report_progress) {
 		progress = start_delayed_progress(
-			_("Writing out commit graph chunks"),
-			progress_cnt = 0);
+			_("Writing out commit graph"), 4 * commits.nr);
+		progress_cnt = 0;
+	}
 	write_graph_chunk_fanout(f, commits.list, commits.nr, progress,
 				 &progress_cnt);
 	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr,
SZEDER Gábor Nov. 21, 2018, 1:23 a.m. UTC | #2
On Tue, Nov 20, 2018 at 05:58:00PM +0100, SZEDER Gábor wrote:
> I saw a
> bit of weirdness while at it, and want to look into it, but now I've
> got to go...

So here are two simple patches that address the "Huh?!" moments I had
while looking at the progress output during writing the commit graph
file.  The first is a small cleanup to avoid confusion, but see the
notes attaches, while the second is a bit of an optimization.

SZEDER Gábor (2):
  commit-graph: rename 'num_extra_edges' variable to 'num_large_edges'
  commit-graph: don't call write_graph_chunk_large_edges() unnecessarily

 commit-graph.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

Patch
diff mbox series

diff --git a/commit-graph.c b/commit-graph.c
index e6d0d7722b..afce20dd4d 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -433,7 +433,9 @@  struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit
 
 static void write_graph_chunk_fanout(struct hashfile *f,
 				     struct commit **commits,
-				     int nr_commits)
+				     int nr_commits,
+				     struct progress *progress,
+				     uint64_t *progress_cnt)
 {
 	int i, count = 0;
 	struct commit **list = commits;
@@ -445,6 +447,8 @@  static void write_graph_chunk_fanout(struct hashfile *f,
 	 */
 	for (i = 0; i < 256; i++) {
 		while (count < nr_commits) {
+			if (progress)
+				display_progress(progress, ++*progress_cnt);
 			if ((*list)->object.oid.hash[0] != i)
 				break;
 			count++;
@@ -456,12 +460,17 @@  static void write_graph_chunk_fanout(struct hashfile *f,
 }
 
 static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
-				   struct commit **commits, int nr_commits)
+				   struct commit **commits, int nr_commits,
+				   struct progress *progress,
+				   uint64_t *progress_cnt)
 {
 	struct commit **list = commits;
 	int count;
-	for (count = 0; count < nr_commits; count++, list++)
+	for (count = 0; count < nr_commits; count++, list++) {
+		if (progress)
+			display_progress(progress, ++*progress_cnt);
 		hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
+	}
 }
 
 static const unsigned char *commit_to_sha1(size_t index, void *table)
@@ -471,7 +480,9 @@  static const unsigned char *commit_to_sha1(size_t index, void *table)
 }
 
 static void write_graph_chunk_data(struct hashfile *f, int hash_len,
-				   struct commit **commits, int nr_commits)
+				   struct commit **commits, int nr_commits,
+				   struct progress *progress,
+				   uint64_t *progress_cnt)
 {
 	struct commit **list = commits;
 	struct commit **last = commits + nr_commits;
@@ -482,6 +493,9 @@  static void write_graph_chunk_data(struct hashfile *f, int hash_len,
 		int edge_value;
 		uint32_t packedDate[2];
 
+		if (progress)
+			display_progress(progress, ++*progress_cnt);
+
 		parse_commit(*list);
 		hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
 
@@ -542,7 +556,9 @@  static void write_graph_chunk_data(struct hashfile *f, int hash_len,
 
 static void write_graph_chunk_large_edges(struct hashfile *f,
 					  struct commit **commits,
-					  int nr_commits)
+					  int nr_commits,
+					  struct progress *progress,
+					  uint64_t *progress_cnt)
 {
 	struct commit **list = commits;
 	struct commit **last = commits + nr_commits;
@@ -566,6 +582,9 @@  static void write_graph_chunk_large_edges(struct hashfile *f,
 						  nr_commits,
 						  commit_to_sha1);
 
+			if (progress)
+				display_progress(progress, ++*progress_cnt);
+
 			if (edge_value < 0)
 				edge_value = GRAPH_PARENT_MISSING;
 			else if (!parent->next)
@@ -764,6 +783,7 @@  void write_commit_graph(const char *obj_dir,
 	int num_extra_edges;
 	struct commit_list *parent;
 	struct progress *progress = NULL;
+	uint64_t progress_cnt;
 
 	if (!commit_graph_compatible(the_repository))
 		return;
@@ -937,10 +957,19 @@  void write_commit_graph(const char *obj_dir,
 		hashwrite(f, chunk_write, 12);
 	}
 
-	write_graph_chunk_fanout(f, commits.list, commits.nr);
-	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
-	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
-	write_graph_chunk_large_edges(f, commits.list, commits.nr);
+	if (report_progress)
+		progress = start_delayed_progress(
+			_("Writing out commit graph chunks"),
+			progress_cnt = 0);
+	write_graph_chunk_fanout(f, commits.list, commits.nr, progress,
+				 &progress_cnt);
+	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr,
+			       progress, &progress_cnt);
+	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr,
+			       progress, &progress_cnt);
+	write_graph_chunk_large_edges(f, commits.list, commits.nr, progress,
+				      &progress_cnt);
+	stop_progress(&progress);
 
 	close_commit_graph(the_repository);
 	finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);