diff mbox series

[v2,2/6] commit-graph write: add more progress output

Message ID 20181120195027.3260-3-avarab@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/6] commit-graph write: rephrase confusing progress output | expand

Commit Message

Ævar Arnfjörð Bjarmason Nov. 20, 2018, 7:50 p.m. UTC
Add more progress output to the output already added in
7b0f229222 ("commit-graph write: add progress output", 2018-09-17).

As noted in that commit most of the progress output isn't displayed on
small repositories, but before this change we'd noticeably hang for
2-3 seconds at the end on medium sized repositories such as linux.git.

Now we'll instead show output like this, and have no human-observable
point at which we're not producing progress output:

    $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
    Finding commits for commit graph: 6365492, done.
    Computing commit graph generation numbers: 100% (797222/797222), done.
    Writing out commit graph: 2399912, done.

This "writing out" number is not meant to be meaningful to the user,
but just to show that we're doing work and the command isn't
hanging.

In the current implementation it's approximately 4x the number of
commits. As noted in on-list discussion[1] we could add the loops up
and show percentage progress here, but I don't think it's worth it. It
would make the implementation more complex and harder to maintain for
very little gain.

On a much larger in-house repository I have we'll show (note how we
also say "Annotating[...]"):

    $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
    Finding commits for commit graph: 50026015, done.
    Annotating commit graph: 21567407, done.
    Computing commit graph generation numbers: 100% (7144680/7144680), done.
    Writing out commit graph: 21434417, done.

1. https://public-inbox.org/git/20181120165800.GB30222@szeder.dev/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 commit-graph.c | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

Comments

SZEDER Gábor Nov. 20, 2018, 11:38 p.m. UTC | #1
On Tue, Nov 20, 2018 at 07:50:23PM +0000, Ævar Arnfjörð Bjarmason wrote:
> Add more progress output to the output already added in
> 7b0f229222 ("commit-graph write: add progress output", 2018-09-17).
> 
> As noted in that commit most of the progress output isn't displayed on
> small repositories, but before this change we'd noticeably hang for
> 2-3 seconds at the end on medium sized repositories such as linux.git.
> 
> Now we'll instead show output like this, and have no human-observable
> point at which we're not producing progress output:
> 
>     $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
>     Finding commits for commit graph: 6365492, done.
>     Computing commit graph generation numbers: 100% (797222/797222), done.
>     Writing out commit graph: 2399912, done.
> 
> This "writing out" number is not meant to be meaningful to the user,
> but just to show that we're doing work and the command isn't
> hanging.
> 
> In the current implementation it's approximately 4x the number of
> commits.

"approximately" only, because the current implementation is buggy :)
If done right it's exactly 4x the number of commits.

> As noted in on-list discussion[1] we could add the loops up
> and show percentage progress here, but I don't think it's worth it. It
> would make the implementation more complex and harder to maintain for
> very little gain.

I think that if we can cheaply and accurately figure out the total,
then we should display it, so onlooking users can guesstimate how much
work is still left to be done.

> On a much larger in-house repository I have we'll show (note how we
> also say "Annotating[...]"):
> 
>     $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
>     Finding commits for commit graph: 50026015, done.
>     Annotating commit graph: 21567407, done.
>     Computing commit graph generation numbers: 100% (7144680/7144680), done.
>     Writing out commit graph: 21434417, done.
> 
> 1. https://public-inbox.org/git/20181120165800.GB30222@szeder.dev/
> 
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
>  commit-graph.c | 41 ++++++++++++++++++++++++++++++++---------
>  1 file changed, 32 insertions(+), 9 deletions(-)
> 
> diff --git a/commit-graph.c b/commit-graph.c
> index e6d0d7722b..6f6409b292 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -433,7 +433,9 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit
>  
>  static void write_graph_chunk_fanout(struct hashfile *f,
>  				     struct commit **commits,
> -				     int nr_commits)
> +				     int nr_commits,
> +				     struct progress *progress,
> +				     uint64_t *progress_cnt)
>  {
>  	int i, count = 0;
>  	struct commit **list = commits;
> @@ -445,6 +447,7 @@ static void write_graph_chunk_fanout(struct hashfile *f,
>  	 */
>  	for (i = 0; i < 256; i++) {
>  		while (count < nr_commits) {
> +			display_progress(progress, ++*progress_cnt);

I think this display_progress() should be places after the condition,
so no one has to waste brain cycles on figuring out, why it always
counts 255 more than the number of commits.

>  			if ((*list)->object.oid.hash[0] != i)
>  				break;
>  			count++;
> @@ -456,12 +459,16 @@ static void write_graph_chunk_fanout(struct hashfile *f,
>  }
>  
>  static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
> -				   struct commit **commits, int nr_commits)
> +				   struct commit **commits, int nr_commits,
> +				   struct progress *progress,
> +				   uint64_t *progress_cnt)
>  {
>  	struct commit **list = commits;
>  	int count;
> -	for (count = 0; count < nr_commits; count++, list++)
> +	for (count = 0; count < nr_commits; count++, list++) {
> +		display_progress(progress, ++*progress_cnt);
>  		hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
> +	}
>  }
>  
>  static const unsigned char *commit_to_sha1(size_t index, void *table)
> @@ -471,7 +478,9 @@ static const unsigned char *commit_to_sha1(size_t index, void *table)
>  }
>  
>  static void write_graph_chunk_data(struct hashfile *f, int hash_len,
> -				   struct commit **commits, int nr_commits)
> +				   struct commit **commits, int nr_commits,
> +				   struct progress *progress,
> +				   uint64_t *progress_cnt)
>  {
>  	struct commit **list = commits;
>  	struct commit **last = commits + nr_commits;
> @@ -481,6 +490,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
>  		struct commit_list *parent;
>  		int edge_value;
>  		uint32_t packedDate[2];
> +		display_progress(progress, ++*progress_cnt);
>  
>  		parse_commit(*list);
>  		hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
> @@ -542,7 +552,9 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
>  
>  static void write_graph_chunk_large_edges(struct hashfile *f,
>  					  struct commit **commits,
> -					  int nr_commits)
> +					  int nr_commits,
> +					  struct progress *progress,
> +					  uint64_t *progress_cnt)
>  {
>  	struct commit **list = commits;
>  	struct commit **last = commits + nr_commits;

> @@ -565,6 +577,7 @@ static void write_graph_chunk_large_edges(struct hashfile *f,

[Adding more before-context to this hunk here...]

>	while (list < last) {

This loop iterates over all commits ...

>		int num_parents = 0;
>		for (parent = (*list)->parents; num_parents < 3 && parent;
>		     parent = parent->next)
>			num_parents++;

... counts the parents of the current commit ...

>
>		if (num_parents <= 2) {
>			list++;
>			continue;

... and continues iterating unless it's an octopus merge.

>		}
>
>		/* Since num_parents > 2, this initializer is safe. */
>		for (parent = (*list)->parents->next; parent; parent = parent->next) {
>			int edge_value = sha1_pos(parent->item->object.oid.hash,
>  						  commits,
>  						  nr_commits,
>  						  commit_to_sha1);
> +			display_progress(progress, ++*progress_cnt);

So this display_progress() call is in the wrong place, because it will
only be invoked on octopus merges, which only rarely occur in
practice, thus it's entirely possible that it won't show any progress
at all while the outer while loop iterates over the whole history.

This display_progress() should be places somewhere before that 'if
(num_parents <= 2)' condition.  And then this one, too, will count the
number of commits.

>  
>  			if (edge_value < 0)
>  				edge_value = GRAPH_PARENT_MISSING;
> @@ -764,6 +777,7 @@ void write_commit_graph(const char *obj_dir,
>  	int num_extra_edges;
>  	struct commit_list *parent;
>  	struct progress *progress = NULL;
> +	uint64_t progress_cnt = 0;
>  
>  	if (!commit_graph_compatible(the_repository))
>  		return;
> @@ -937,10 +951,19 @@ void write_commit_graph(const char *obj_dir,
>  		hashwrite(f, chunk_write, 12);
>  	}
>  
> -	write_graph_chunk_fanout(f, commits.list, commits.nr);
> -	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
> -	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
> -	write_graph_chunk_large_edges(f, commits.list, commits.nr);
> +	if (report_progress)
> +		progress = start_delayed_progress(
> +			_("Writing out commit graph"),
> +			0);
> +	write_graph_chunk_fanout(f, commits.list, commits.nr, progress,
> +				 &progress_cnt);
> +	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr,
> +			       progress, &progress_cnt);
> +	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr,
> +			       progress, &progress_cnt);
> +	write_graph_chunk_large_edges(f, commits.list, commits.nr, progress,
> +				      &progress_cnt);
> +	stop_progress(&progress);
>  
>  	close_commit_graph(the_repository);
>  	finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
> -- 
> 2.20.0.rc0.387.gc7a69e6b6c
>
diff mbox series

Patch

diff --git a/commit-graph.c b/commit-graph.c
index e6d0d7722b..6f6409b292 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -433,7 +433,9 @@  struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit
 
 static void write_graph_chunk_fanout(struct hashfile *f,
 				     struct commit **commits,
-				     int nr_commits)
+				     int nr_commits,
+				     struct progress *progress,
+				     uint64_t *progress_cnt)
 {
 	int i, count = 0;
 	struct commit **list = commits;
@@ -445,6 +447,7 @@  static void write_graph_chunk_fanout(struct hashfile *f,
 	 */
 	for (i = 0; i < 256; i++) {
 		while (count < nr_commits) {
+			display_progress(progress, ++*progress_cnt);
 			if ((*list)->object.oid.hash[0] != i)
 				break;
 			count++;
@@ -456,12 +459,16 @@  static void write_graph_chunk_fanout(struct hashfile *f,
 }
 
 static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
-				   struct commit **commits, int nr_commits)
+				   struct commit **commits, int nr_commits,
+				   struct progress *progress,
+				   uint64_t *progress_cnt)
 {
 	struct commit **list = commits;
 	int count;
-	for (count = 0; count < nr_commits; count++, list++)
+	for (count = 0; count < nr_commits; count++, list++) {
+		display_progress(progress, ++*progress_cnt);
 		hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
+	}
 }
 
 static const unsigned char *commit_to_sha1(size_t index, void *table)
@@ -471,7 +478,9 @@  static const unsigned char *commit_to_sha1(size_t index, void *table)
 }
 
 static void write_graph_chunk_data(struct hashfile *f, int hash_len,
-				   struct commit **commits, int nr_commits)
+				   struct commit **commits, int nr_commits,
+				   struct progress *progress,
+				   uint64_t *progress_cnt)
 {
 	struct commit **list = commits;
 	struct commit **last = commits + nr_commits;
@@ -481,6 +490,7 @@  static void write_graph_chunk_data(struct hashfile *f, int hash_len,
 		struct commit_list *parent;
 		int edge_value;
 		uint32_t packedDate[2];
+		display_progress(progress, ++*progress_cnt);
 
 		parse_commit(*list);
 		hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
@@ -542,7 +552,9 @@  static void write_graph_chunk_data(struct hashfile *f, int hash_len,
 
 static void write_graph_chunk_large_edges(struct hashfile *f,
 					  struct commit **commits,
-					  int nr_commits)
+					  int nr_commits,
+					  struct progress *progress,
+					  uint64_t *progress_cnt)
 {
 	struct commit **list = commits;
 	struct commit **last = commits + nr_commits;
@@ -565,6 +577,7 @@  static void write_graph_chunk_large_edges(struct hashfile *f,
 						  commits,
 						  nr_commits,
 						  commit_to_sha1);
+			display_progress(progress, ++*progress_cnt);
 
 			if (edge_value < 0)
 				edge_value = GRAPH_PARENT_MISSING;
@@ -764,6 +777,7 @@  void write_commit_graph(const char *obj_dir,
 	int num_extra_edges;
 	struct commit_list *parent;
 	struct progress *progress = NULL;
+	uint64_t progress_cnt = 0;
 
 	if (!commit_graph_compatible(the_repository))
 		return;
@@ -937,10 +951,19 @@  void write_commit_graph(const char *obj_dir,
 		hashwrite(f, chunk_write, 12);
 	}
 
-	write_graph_chunk_fanout(f, commits.list, commits.nr);
-	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
-	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
-	write_graph_chunk_large_edges(f, commits.list, commits.nr);
+	if (report_progress)
+		progress = start_delayed_progress(
+			_("Writing out commit graph"),
+			0);
+	write_graph_chunk_fanout(f, commits.list, commits.nr, progress,
+				 &progress_cnt);
+	write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr,
+			       progress, &progress_cnt);
+	write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr,
+			       progress, &progress_cnt);
+	write_graph_chunk_large_edges(f, commits.list, commits.nr, progress,
+				      &progress_cnt);
+	stop_progress(&progress);
 
 	close_commit_graph(the_repository);
 	finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);