diff mbox series

[v2,3/7] subtree: persist cache between split runs

Message ID 8eec18388c86071db47512b84118e3b9111bd34d.1602021913.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series subtree: Fix handling of complex history | expand

Commit Message

John Cai via GitGitGadget Oct. 6, 2020, 10:05 p.m. UTC
From: Tom Clarkson <tom@tqclarkson.com>

Provide a mechanism for handling problematic commits. If the algorithm
in process_split_commit is getting something wrong, you can write a
corrected value to the cache before running split.

Signed-off-by: Tom Clarkson <tom@tqclarkson.com>
---
 contrib/subtree/git-subtree.sh | 37 ++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

Comments

Johannes Schindelin Oct. 7, 2020, 4:06 p.m. UTC | #1
Hi Tom,

On Tue, 6 Oct 2020, Tom Clarkson via GitGitGadget wrote:

> @@ -48,6 +49,7 @@ annotate=
>  squash=
>  message=
>  prefix=
> +clearcache=

It might be more consistent to call it `clear_cache` (i.e. with an
underscore), just like `ignore_joins`.

>
>  debug () {
>  	if test -n "$debug"
> @@ -131,6 +133,9 @@ do
>  	--no-rejoin)
>  		rejoin=
>  		;;
> +	--clear-cache)
> +		clearcache=1
> +		;;
>  	--ignore-joins)
>  		ignore_joins=1
>  		;;
> @@ -206,9 +211,13 @@ debug "opts: {$*}"
>  debug
>
>  cache_setup () {
> -	cachedir="$GIT_DIR/subtree-cache/$$"
> -	rm -rf "$cachedir" ||
> -		die "Can't delete old cachedir: $cachedir"
> +	cachedir="$GIT_DIR/subtree-cache/$prefix"

Excellent, the `prefix` should be "unique enough".

> +	if test -n "$clearcache"
> +	then
> +		debug "Clearing cache"
> +		rm -rf "$cachedir" ||
> +			die "Can't delete old cachedir: $cachedir"
> +	fi
>  	mkdir -p "$cachedir" ||
>  		die "Can't create new cachedir: $cachedir"
>  	mkdir -p "$cachedir/notree" ||
> @@ -266,6 +275,16 @@ cache_set () {
>  	echo "$newrev" >"$cachedir/$oldrev"
>  }
>
> +cache_set_if_unset () {
> +	oldrev="$1"
> +	newrev="$2"

`local`? ;-)

> +	if test -e "$cachedir/$oldrev"
> +	then
> +		return
> +	fi
> +	echo "$newrev" >"$cachedir/$oldrev"

So that directory contains commit mappings, a file for each mapped
revision.

Thinking back to patch 2/11, I am now no longer that sure that it makes
sense to fill it up with every commit in that commit range: performance
suffers when directories contain too many files.

For example, I had a case in the past where it took a minute just to
enumerate a directory, and even looking whether a file existed in that
directory was not exactly fun.

In any case, I would write it slightly shorter:

	test -e "$cachedir/$oldrev" ||
	echo "$newrev" >"$cachedir/$oldrev"

> +}
> +
>  rev_exists () {
>  	if git rev-parse "$1" >/dev/null 2>&1
>  	then
> @@ -375,13 +394,13 @@ find_existing_splits () {
>  			then
>  				# squash commits refer to a subtree
>  				debug "  Squash: $sq from $sub"
> -				cache_set "$sq" "$sub"
> +				cache_set_if_unset "$sq" "$sub"
>  			fi
>  			if test -n "$main" -a -n "$sub"
>  			then
>  				debug "  Prior: $main -> $sub"
> -				cache_set $main $sub
> -				cache_set $sub $sub
> +				cache_set_if_unset $main $sub
> +				cache_set_if_unset $sub $sub
>  				try_remove_previous "$main"
>  				try_remove_previous "$sub"
>  			fi
> @@ -688,6 +707,8 @@ process_split_commit () {
>  		if test -n "$newparents"
>  		then
>  			cache_set "$rev" "$rev"
> +		else
> +			cache_set "$rev" ""

Was this hunk intended to be snuck in here? I can understand the
s/cache_set/cache_set_if_unset/ changes, of course, but not this hunk.

>  		fi
>  		return
>  	fi
> @@ -785,7 +806,7 @@ cmd_split () {
>  			# the 'onto' history is already just the subdir, so
>  			# any parent we find there can be used verbatim
>  			debug "  cache: $rev"
> -			cache_set "$rev" "$rev"
> +			cache_set_if_unset "$rev" "$rev"
>  		done
>  	fi
>
> @@ -798,7 +819,7 @@ cmd_split () {
>  		git rev-list --topo-order --skip=1 $mainline |
>  		while read rev
>  		do
> -			cache_set "$rev" ""
> +			cache_set_if_unset "$rev" ""

Okay. A quite interesting question now would be: are there any callers of
`cache_set` left? If so, why?

Thanks,
Dscho

>  		done || exit $?
>  	fi
>
> --
> gitgitgadget
>
>
diff mbox series

Patch

diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh
index 160bad95c1..c21d620610 100755
--- a/contrib/subtree/git-subtree.sh
+++ b/contrib/subtree/git-subtree.sh
@@ -27,6 +27,7 @@  b,branch=     create a new branch from the split subtree
 ignore-joins  ignore prior --rejoin commits
 onto=         try connecting new tree to an existing one
 rejoin        merge the new branch back into HEAD
+clear-cache   reset the subtree mapping cache
  options for 'add', 'merge', and 'pull'
 squash        merge subtree changes as a single commit
 "
@@ -48,6 +49,7 @@  annotate=
 squash=
 message=
 prefix=
+clearcache=
 
 debug () {
 	if test -n "$debug"
@@ -131,6 +133,9 @@  do
 	--no-rejoin)
 		rejoin=
 		;;
+	--clear-cache)
+		clearcache=1
+		;;
 	--ignore-joins)
 		ignore_joins=1
 		;;
@@ -206,9 +211,13 @@  debug "opts: {$*}"
 debug
 
 cache_setup () {
-	cachedir="$GIT_DIR/subtree-cache/$$"
-	rm -rf "$cachedir" ||
-		die "Can't delete old cachedir: $cachedir"
+	cachedir="$GIT_DIR/subtree-cache/$prefix"
+	if test -n "$clearcache"
+	then
+		debug "Clearing cache"
+		rm -rf "$cachedir" ||
+			die "Can't delete old cachedir: $cachedir"
+	fi
 	mkdir -p "$cachedir" ||
 		die "Can't create new cachedir: $cachedir"
 	mkdir -p "$cachedir/notree" ||
@@ -266,6 +275,16 @@  cache_set () {
 	echo "$newrev" >"$cachedir/$oldrev"
 }
 
+cache_set_if_unset () {
+	oldrev="$1"
+	newrev="$2"
+	if test -e "$cachedir/$oldrev"
+	then
+		return
+	fi
+	echo "$newrev" >"$cachedir/$oldrev"
+}
+
 rev_exists () {
 	if git rev-parse "$1" >/dev/null 2>&1
 	then
@@ -375,13 +394,13 @@  find_existing_splits () {
 			then
 				# squash commits refer to a subtree
 				debug "  Squash: $sq from $sub"
-				cache_set "$sq" "$sub"
+				cache_set_if_unset "$sq" "$sub"
 			fi
 			if test -n "$main" -a -n "$sub"
 			then
 				debug "  Prior: $main -> $sub"
-				cache_set $main $sub
-				cache_set $sub $sub
+				cache_set_if_unset $main $sub
+				cache_set_if_unset $sub $sub
 				try_remove_previous "$main"
 				try_remove_previous "$sub"
 			fi
@@ -688,6 +707,8 @@  process_split_commit () {
 		if test -n "$newparents"
 		then
 			cache_set "$rev" "$rev"
+		else
+			cache_set "$rev" ""
 		fi
 		return
 	fi
@@ -785,7 +806,7 @@  cmd_split () {
 			# the 'onto' history is already just the subdir, so
 			# any parent we find there can be used verbatim
 			debug "  cache: $rev"
-			cache_set "$rev" "$rev"
+			cache_set_if_unset "$rev" "$rev"
 		done
 	fi
 
@@ -798,7 +819,7 @@  cmd_split () {
 		git rev-list --topo-order --skip=1 $mainline |
 		while read rev
 		do
-			cache_set "$rev" ""
+			cache_set_if_unset "$rev" ""
 		done || exit $?
 	fi