mbox series

[v2,0/3] Fix built-in rebase perf regression

Message ID pull.72.v2.git.gitgitgadget@gmail.com (mailing list archive)
Headers show
Series Fix built-in rebase perf regression | expand

Message

Derrick Stolee via GitGitGadget Nov. 12, 2018, 11:44 a.m. UTC
In our tests with large repositories, we noticed a serious regression of the
performance of git rebase when using the built-in vs the shell script
version. It boils down to an incorrect conversion of a git checkout -q:
instead of using a twoway_merge as git checkout does, we used a oneway_merge 
as git reset does. The latter, however, calls lstat() on all files listed in
the index, while the former essentially looks only at the files that are
different between the given two revisions.

Let's reinstate the original behavior by introducing a flag to the 
reset_head() function to indicate whether we want to emulate reset --hard 
(in which case we use the oneway_merge, otherwise we use twoway_merge).

Johannes Schindelin (3):
  rebase: consolidate clean-up code before leaving reset_head()
  rebase: prepare reset_head() for more flags
  built-in rebase: reinstate `checkout -q` behavior where appropriate

 builtin/rebase.c | 79 ++++++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 33 deletions(-)


base-commit: 8858448bb49332d353febc078ce4a3abcc962efe
Published-As: https://github.com/gitgitgadget/git/releases/tags/pr-72%2Fdscho%2Fbuiltin-rebase-perf-regression-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-72/dscho/builtin-rebase-perf-regression-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/72

Range-diff vs v1:

 1:  64597fe827 ! 1:  28e24d98ab rebase: consolidate clean-up code before leaving reset_head()
     @@ -11,6 +11,33 @@
      --- a/builtin/rebase.c
      +++ b/builtin/rebase.c
      @@
     + 	if (switch_to_branch && !starts_with(switch_to_branch, "refs/"))
     + 		BUG("Not a fully qualified branch: '%s'", switch_to_branch);
     + 
     +-	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0)
     +-		return -1;
     ++	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0) {
     ++		ret = -1;
     ++		goto leave_reset_head;
     ++	}
     + 
     + 	if (!oid) {
     + 		if (get_oid("HEAD", &head_oid)) {
     +-			rollback_lock_file(&lock);
     +-			return error(_("could not determine HEAD revision"));
     ++			ret = error(_("could not determine HEAD revision"));
     ++			goto leave_reset_head;
     + 		}
     + 		oid = &head_oid;
     + 	}
     +@@
     + 		unpack_tree_opts.reset = 1;
     + 
     + 	if (read_index_unmerged(the_repository->index) < 0) {
     +-		rollback_lock_file(&lock);
     +-		return error(_("could not read index"));
     ++		ret = error(_("could not read index"));
     ++		goto leave_reset_head;
       	}
       
       	if (!fill_tree_descriptor(&desc, oid)) {
     @@ -31,15 +58,17 @@
       	}
       
       	tree = parse_tree_indirect(oid);
     -@@
     + 	prime_cache_tree(the_repository->index, tree);
       
     - 	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0)
     +-	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0)
     ++	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0) {
       		ret = error(_("could not write index"));
      -	free((void *)desc.buffer);
     - 
     - 	if (ret)
     +-
     +-	if (ret)
      -		return ret;
      +		goto leave_reset_head;
     ++	}
       
       	reflog_action = getenv(GIT_REFLOG_ACTION_ENVIRONMENT);
       	strbuf_addf(&msg, "%s: ", reflog_action ? reflog_action : "rebase");
 -:  ---------- > 2:  db963b2094 rebase: prepare reset_head() for more flags
 2:  070092b430 ! 3:  a7360b856f built-in rebase: reinstate `checkout -q` behavior where appropriate
     @@ -20,15 +20,18 @@
      @@
       #define GIT_REFLOG_ACTION_ENVIRONMENT "GIT_REFLOG_ACTION"
       
     + #define RESET_HEAD_DETACH (1<<0)
     ++#define RESET_HEAD_HARD (1<<1)
     + 
       static int reset_head(struct object_id *oid, const char *action,
     --		      const char *switch_to_branch, int detach_head,
     -+		      const char *switch_to_branch,
     -+		      int detach_head, int reset_hard,
     + 		      const char *switch_to_branch, unsigned flags,
       		      const char *reflog_orig_head, const char *reflog_head)
       {
     + 	unsigned detach_head = flags & RESET_HEAD_DETACH;
     ++	unsigned reset_hard = flags & RESET_HEAD_HARD;
       	struct object_id head_oid;
      -	struct tree_desc desc;
     -+	struct tree_desc desc[2];
     ++	struct tree_desc desc[2] = { { NULL }, { NULL } };
       	struct lock_file lock = LOCK_INIT;
       	struct unpack_trees_options unpack_tree_opts;
       	struct tree *tree;
     @@ -42,18 +45,18 @@
       	if (switch_to_branch && !starts_with(switch_to_branch, "refs/"))
       		BUG("Not a fully qualified branch: '%s'", switch_to_branch);
      @@
     - 	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0)
     - 		return -1;
     + 		goto leave_reset_head;
     + 	}
       
      -	if (!oid) {
      -		if (get_oid("HEAD", &head_oid)) {
     --			rollback_lock_file(&lock);
     --			return error(_("could not determine HEAD revision"));
     +-			ret = error(_("could not determine HEAD revision"));
     +-			goto leave_reset_head;
      -		}
      -		oid = &head_oid;
     -+	if (get_oid("HEAD", &head_oid)) {
     -+		rollback_lock_file(&lock);
     -+		return error(_("could not determine HEAD revision"));
     ++	if ((!oid || !reset_hard) && get_oid("HEAD", &head_oid)) {
     ++		ret = error(_("could not determine HEAD revision"));
     ++		goto leave_reset_head;
       	}
       
      +	if (!oid)
     @@ -70,7 +73,7 @@
       	unpack_tree_opts.merge = 1;
       	if (!detach_head)
      @@
     - 		return error(_("could not read index"));
     + 		goto leave_reset_head;
       	}
       
      -	if (!fill_tree_descriptor(&desc, oid)) {
     @@ -104,7 +107,8 @@
       		string_list_clear(&merge_rr, 1);
       
      -		if (reset_head(NULL, "reset", NULL, 0, NULL, NULL) < 0)
     -+		if (reset_head(NULL, "reset", NULL, 0, 1, NULL, NULL) < 0)
     ++		if (reset_head(NULL, "reset", NULL, RESET_HEAD_HARD,
     ++			       NULL, NULL) < 0)
       			die(_("could not discard worktree changes"));
       		if (read_basic_state(&options))
       			exit(1);
     @@ -113,7 +117,8 @@
       			exit(1);
       		if (reset_head(&options.orig_head, "reset",
      -			       options.head_name, 0, NULL, NULL) < 0)
     -+			       options.head_name, 0, 1, NULL, NULL) < 0)
     ++			       options.head_name, RESET_HEAD_HARD,
     ++			       NULL, NULL) < 0)
       			die(_("could not move back to %s"),
       			    oid_to_hex(&options.orig_head));
       		ret = finish_rebase(&options);
     @@ -122,34 +127,7 @@
       			printf(_("Created autostash: %s\n"), buf.buf);
       			if (reset_head(&head->object.oid, "reset --hard",
      -				       NULL, 0, NULL, NULL) < 0)
     -+				       NULL, 0, 1, NULL, NULL) < 0)
     ++				       NULL, RESET_HEAD_HARD, NULL, NULL) < 0)
       				die(_("could not reset --hard"));
       			printf(_("HEAD is now at %s"),
       			       find_unique_abbrev(&head->object.oid,
     -@@
     - 				strbuf_addf(&buf, "rebase: checkout %s",
     - 					    options.switch_to);
     - 				if (reset_head(&oid, "checkout",
     --					       options.head_name, 0,
     -+					       options.head_name, 0, 0,
     - 					       NULL, NULL) < 0) {
     - 					ret = !!error(_("could not switch to "
     - 							"%s"),
     -@@
     - 			 "it...\n"));
     - 
     - 	strbuf_addf(&msg, "rebase: checkout %s", options.onto_name);
     --	if (reset_head(&options.onto->object.oid, "checkout", NULL, 1,
     -+	if (reset_head(&options.onto->object.oid, "checkout", NULL, 1, 0,
     - 	    NULL, msg.buf))
     - 		die(_("Could not detach HEAD"));
     - 	strbuf_release(&msg);
     -@@
     - 		strbuf_addf(&msg, "rebase finished: %s onto %s",
     - 			options.head_name ? options.head_name : "detached HEAD",
     - 			oid_to_hex(&options.onto->object.oid));
     --		reset_head(NULL, "Fast-forwarded", options.head_name, 0,
     -+		reset_head(NULL, "Fast-forwarded", options.head_name, 0, 0,
     - 			   "HEAD", msg.buf);
     - 		strbuf_release(&msg);
     - 		ret = !!finish_rebase(&options);

Comments

Johannes Schindelin Nov. 12, 2018, 2:26 p.m. UTC | #1
Hi,

On Mon, 12 Nov 2018, Johannes Schindelin via GitGitGadget wrote:

> In our tests with large repositories, we noticed a serious regression of the
> performance of git rebase when using the built-in vs the shell script
> version. It boils down to an incorrect conversion of a git checkout -q:
> instead of using a twoway_merge as git checkout does, we used a oneway_merge 
> as git reset does. The latter, however, calls lstat() on all files listed in
> the index, while the former essentially looks only at the files that are
> different between the given two revisions.
> 
> Let's reinstate the original behavior by introducing a flag to the 
> reset_head() function to indicate whether we want to emulate reset --hard 
> (in which case we use the oneway_merge, otherwise we use twoway_merge).
> 
> Johannes Schindelin (3):
>   rebase: consolidate clean-up code before leaving reset_head()
>   rebase: prepare reset_head() for more flags
>   built-in rebase: reinstate `checkout -q` behavior where appropriate
> 
>  builtin/rebase.c | 79 ++++++++++++++++++++++++++++--------------------
>  1 file changed, 46 insertions(+), 33 deletions(-)

I forgot to specify the changes vs v1:

- More error paths are not consolidated via `goto leave_reset_head`.
- The `desc` array is not initialized to all-zero, to avoid bogus
  addresses being passed to `free()`.
- The `detach_head` and `reset_hard` parameters have been consolidated
  into a `flags` parameter.
- The `reset_head()` function once again only initializes `head_oid`
  when needed.

Sorry for the omission,
Johannes

> 
> 
> base-commit: 8858448bb49332d353febc078ce4a3abcc962efe
> Published-As: https://github.com/gitgitgadget/git/releases/tags/pr-72%2Fdscho%2Fbuiltin-rebase-perf-regression-v2
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-72/dscho/builtin-rebase-perf-regression-v2
> Pull-Request: https://github.com/gitgitgadget/git/pull/72
> 
> Range-diff vs v1:
> 
>  1:  64597fe827 ! 1:  28e24d98ab rebase: consolidate clean-up code before leaving reset_head()
>      @@ -11,6 +11,33 @@
>       --- a/builtin/rebase.c
>       +++ b/builtin/rebase.c
>       @@
>      + 	if (switch_to_branch && !starts_with(switch_to_branch, "refs/"))
>      + 		BUG("Not a fully qualified branch: '%s'", switch_to_branch);
>      + 
>      +-	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0)
>      +-		return -1;
>      ++	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0) {
>      ++		ret = -1;
>      ++		goto leave_reset_head;
>      ++	}
>      + 
>      + 	if (!oid) {
>      + 		if (get_oid("HEAD", &head_oid)) {
>      +-			rollback_lock_file(&lock);
>      +-			return error(_("could not determine HEAD revision"));
>      ++			ret = error(_("could not determine HEAD revision"));
>      ++			goto leave_reset_head;
>      + 		}
>      + 		oid = &head_oid;
>      + 	}
>      +@@
>      + 		unpack_tree_opts.reset = 1;
>      + 
>      + 	if (read_index_unmerged(the_repository->index) < 0) {
>      +-		rollback_lock_file(&lock);
>      +-		return error(_("could not read index"));
>      ++		ret = error(_("could not read index"));
>      ++		goto leave_reset_head;
>        	}
>        
>        	if (!fill_tree_descriptor(&desc, oid)) {
>      @@ -31,15 +58,17 @@
>        	}
>        
>        	tree = parse_tree_indirect(oid);
>      -@@
>      + 	prime_cache_tree(the_repository->index, tree);
>        
>      - 	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0)
>      +-	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0)
>      ++	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0) {
>        		ret = error(_("could not write index"));
>       -	free((void *)desc.buffer);
>      - 
>      - 	if (ret)
>      +-
>      +-	if (ret)
>       -		return ret;
>       +		goto leave_reset_head;
>      ++	}
>        
>        	reflog_action = getenv(GIT_REFLOG_ACTION_ENVIRONMENT);
>        	strbuf_addf(&msg, "%s: ", reflog_action ? reflog_action : "rebase");
>  -:  ---------- > 2:  db963b2094 rebase: prepare reset_head() for more flags
>  2:  070092b430 ! 3:  a7360b856f built-in rebase: reinstate `checkout -q` behavior where appropriate
>      @@ -20,15 +20,18 @@
>       @@
>        #define GIT_REFLOG_ACTION_ENVIRONMENT "GIT_REFLOG_ACTION"
>        
>      + #define RESET_HEAD_DETACH (1<<0)
>      ++#define RESET_HEAD_HARD (1<<1)
>      + 
>        static int reset_head(struct object_id *oid, const char *action,
>      --		      const char *switch_to_branch, int detach_head,
>      -+		      const char *switch_to_branch,
>      -+		      int detach_head, int reset_hard,
>      + 		      const char *switch_to_branch, unsigned flags,
>        		      const char *reflog_orig_head, const char *reflog_head)
>        {
>      + 	unsigned detach_head = flags & RESET_HEAD_DETACH;
>      ++	unsigned reset_hard = flags & RESET_HEAD_HARD;
>        	struct object_id head_oid;
>       -	struct tree_desc desc;
>      -+	struct tree_desc desc[2];
>      ++	struct tree_desc desc[2] = { { NULL }, { NULL } };
>        	struct lock_file lock = LOCK_INIT;
>        	struct unpack_trees_options unpack_tree_opts;
>        	struct tree *tree;
>      @@ -42,18 +45,18 @@
>        	if (switch_to_branch && !starts_with(switch_to_branch, "refs/"))
>        		BUG("Not a fully qualified branch: '%s'", switch_to_branch);
>       @@
>      - 	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0)
>      - 		return -1;
>      + 		goto leave_reset_head;
>      + 	}
>        
>       -	if (!oid) {
>       -		if (get_oid("HEAD", &head_oid)) {
>      --			rollback_lock_file(&lock);
>      --			return error(_("could not determine HEAD revision"));
>      +-			ret = error(_("could not determine HEAD revision"));
>      +-			goto leave_reset_head;
>       -		}
>       -		oid = &head_oid;
>      -+	if (get_oid("HEAD", &head_oid)) {
>      -+		rollback_lock_file(&lock);
>      -+		return error(_("could not determine HEAD revision"));
>      ++	if ((!oid || !reset_hard) && get_oid("HEAD", &head_oid)) {
>      ++		ret = error(_("could not determine HEAD revision"));
>      ++		goto leave_reset_head;
>        	}
>        
>       +	if (!oid)
>      @@ -70,7 +73,7 @@
>        	unpack_tree_opts.merge = 1;
>        	if (!detach_head)
>       @@
>      - 		return error(_("could not read index"));
>      + 		goto leave_reset_head;
>        	}
>        
>       -	if (!fill_tree_descriptor(&desc, oid)) {
>      @@ -104,7 +107,8 @@
>        		string_list_clear(&merge_rr, 1);
>        
>       -		if (reset_head(NULL, "reset", NULL, 0, NULL, NULL) < 0)
>      -+		if (reset_head(NULL, "reset", NULL, 0, 1, NULL, NULL) < 0)
>      ++		if (reset_head(NULL, "reset", NULL, RESET_HEAD_HARD,
>      ++			       NULL, NULL) < 0)
>        			die(_("could not discard worktree changes"));
>        		if (read_basic_state(&options))
>        			exit(1);
>      @@ -113,7 +117,8 @@
>        			exit(1);
>        		if (reset_head(&options.orig_head, "reset",
>       -			       options.head_name, 0, NULL, NULL) < 0)
>      -+			       options.head_name, 0, 1, NULL, NULL) < 0)
>      ++			       options.head_name, RESET_HEAD_HARD,
>      ++			       NULL, NULL) < 0)
>        			die(_("could not move back to %s"),
>        			    oid_to_hex(&options.orig_head));
>        		ret = finish_rebase(&options);
>      @@ -122,34 +127,7 @@
>        			printf(_("Created autostash: %s\n"), buf.buf);
>        			if (reset_head(&head->object.oid, "reset --hard",
>       -				       NULL, 0, NULL, NULL) < 0)
>      -+				       NULL, 0, 1, NULL, NULL) < 0)
>      ++				       NULL, RESET_HEAD_HARD, NULL, NULL) < 0)
>        				die(_("could not reset --hard"));
>        			printf(_("HEAD is now at %s"),
>        			       find_unique_abbrev(&head->object.oid,
>      -@@
>      - 				strbuf_addf(&buf, "rebase: checkout %s",
>      - 					    options.switch_to);
>      - 				if (reset_head(&oid, "checkout",
>      --					       options.head_name, 0,
>      -+					       options.head_name, 0, 0,
>      - 					       NULL, NULL) < 0) {
>      - 					ret = !!error(_("could not switch to "
>      - 							"%s"),
>      -@@
>      - 			 "it...\n"));
>      - 
>      - 	strbuf_addf(&msg, "rebase: checkout %s", options.onto_name);
>      --	if (reset_head(&options.onto->object.oid, "checkout", NULL, 1,
>      -+	if (reset_head(&options.onto->object.oid, "checkout", NULL, 1, 0,
>      - 	    NULL, msg.buf))
>      - 		die(_("Could not detach HEAD"));
>      - 	strbuf_release(&msg);
>      -@@
>      - 		strbuf_addf(&msg, "rebase finished: %s onto %s",
>      - 			options.head_name ? options.head_name : "detached HEAD",
>      - 			oid_to_hex(&options.onto->object.oid));
>      --		reset_head(NULL, "Fast-forwarded", options.head_name, 0,
>      -+		reset_head(NULL, "Fast-forwarded", options.head_name, 0, 0,
>      - 			   "HEAD", msg.buf);
>      - 		strbuf_release(&msg);
>      - 		ret = !!finish_rebase(&options);
> 
> -- 
> gitgitgadget
>