diff mbox series

[RFC,1/4] name-rev: improve name_rev() memory usage

Message ID 20190301175024.17337-2-alban.gruin@gmail.com (mailing list archive)
State New, archived
Headers show
Series name-rev: improve memory usage | expand

Commit Message

Alban Gruin March 1, 2019, 5:50 p.m. UTC
name_rev() is a recursive function.  For each commit, it allocates the
name of its parents, and call itself.  A parent may not use a name for
multiple reasons, but in any case, the name will not be released.  On a
repository with a lot of branches, tags, remotes, and commits, it can
use more than 2GB of RAM.

To improve the situation, name_rev() now returns a boolean to its caller
indicating if it can release the name.  The caller may free the name if
the commit is too old, or if the new name is not better than the current
name.

There a condition that will always be false here when name_rev() calls
itself for the first parent, but it will become useful when name_rev()
will stop to name commits that are not mentionned in the stdin buffer.
If the current commit should not be named, its parents may have to be,
but they may not.  In this case, name_rev() will tell to its caller that
the current commit and its first parent has not used the name, and that
it can be released.  However, if the current commit has been named but
not its parent, or the reverse, the name will not be released.

Signed-off-by: Alban Gruin <alban.gruin@gmail.com>
---
 builtin/name-rev.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

Comments

Eric Sunshine March 1, 2019, 6 p.m. UTC | #1
On Fri, Mar 1, 2019 at 12:50 PM Alban Gruin <alban.gruin@gmail.com> wrote:
> There a condition that will always be false here when name_rev() calls
> itself for the first parent, but it will become useful when name_rev()
> will stop to name commits that are not mentionned in the stdin buffer.

s/mentionned/mentioned/

> If the current commit should not be named, its parents may have to be,
> but they may not.  In this case, name_rev() will tell to its caller that

s/tell/report/

> the current commit and its first parent has not used the name, and that
> it can be released.  However, if the current commit has been named but
> not its parent, or the reverse, the name will not be released.
>
> Signed-off-by: Alban Gruin <alban.gruin@gmail.com>
Jeff King March 1, 2019, 6:44 p.m. UTC | #2
On Fri, Mar 01, 2019 at 06:50:21PM +0100, Alban Gruin wrote:

> name_rev() is a recursive function.  For each commit, it allocates the
> name of its parents, and call itself.  A parent may not use a name for
> multiple reasons, but in any case, the name will not be released.  On a
> repository with a lot of branches, tags, remotes, and commits, it can
> use more than 2GB of RAM.

I don't know name-rev that well, so I didn't look super carefully at
your patch. But if it's recursive in history, that's almost certainly
going to be a problem for some histories on some platforms. We've had to
de-recursify several other algorithms (e.g., "tag --contains") because
of stack space issues.

Which isn't to say what you're doing here might not be separately
useful, but just know that in the long run that's probably where we'd
need to take this.

-Peff
Johannes Schindelin March 2, 2019, 9:28 p.m. UTC | #3
Hi Alban,

On Fri, 1 Mar 2019, Alban Gruin wrote:

> name_rev() is a recursive function.  For each commit, it allocates the
> name of its parents, and call itself.  A parent may not use a name for
> multiple reasons, but in any case, the name will not be released.  On a
> repository with a lot of branches, tags, remotes, and commits, it can
> use more than 2GB of RAM.
> 
> To improve the situation, name_rev() now returns a boolean to its caller
> indicating if it can release the name.  The caller may free the name if
> the commit is too old, or if the new name is not better than the current
> name.
> 
> There a condition that will always be false here when name_rev() calls
> itself for the first parent, but it will become useful when name_rev()
> will stop to name commits that are not mentionned in the stdin buffer.
> If the current commit should not be named, its parents may have to be,
> but they may not.  In this case, name_rev() will tell to its caller that
> the current commit and its first parent has not used the name, and that
> it can be released.  However, if the current commit has been named but
> not its parent, or the reverse, the name will not be released.

Makes sense, and the patch looks mostly good to me, just one suggestion:

> Signed-off-by: Alban Gruin <alban.gruin@gmail.com>
> ---
>  builtin/name-rev.c | 23 ++++++++++++++---------
>  1 file changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/builtin/name-rev.c b/builtin/name-rev.c
> index f1cb45c227..0719a9388d 100644
> --- a/builtin/name-rev.c
> +++ b/builtin/name-rev.c
> @@ -77,7 +77,7 @@ static int is_better_name(struct rev_name *name,
>  	return 0;
>  }
>  
> -static void name_rev(struct commit *commit,
> +static int name_rev(struct commit *commit,
>  		const char *tip_name, timestamp_t taggerdate,
>  		int generation, int distance, int from_tag,
>  		int deref)
> @@ -86,11 +86,12 @@ static void name_rev(struct commit *commit,
>  	struct commit_list *parents;
>  	int parent_number = 1;
>  	char *to_free = NULL;
> +	int free_alloc = 1;
>  
>  	parse_commit(commit);
>  
>  	if (commit->date < cutoff)
> -		return;
> +		return 1;
>  
>  	if (deref) {
>  		tip_name = to_free = xstrfmt("%s^0", tip_name);
> @@ -111,9 +112,10 @@ static void name_rev(struct commit *commit,
>  		name->generation = generation;
>  		name->distance = distance;
>  		name->from_tag = from_tag;
> +		free_alloc = 0;
>  	} else {
>  		free(to_free);
> -		return;
> +		return 1;
>  	}
>  
>  	for (parents = commit->parents;
> @@ -131,15 +133,18 @@ static void name_rev(struct commit *commit,
>  				new_name = xstrfmt("%.*s^%d", (int)len, tip_name,
>  						   parent_number);
>  
> -			name_rev(parents->item, new_name, taggerdate, 0,
> -				 distance + MERGE_TRAVERSAL_WEIGHT,
> -				 from_tag, 0);
> +			if (name_rev(parents->item, new_name, taggerdate, 0,
> +				      distance + MERGE_TRAVERSAL_WEIGHT,
> +				      from_tag, 0))
> +				free(new_name);
>  		} else {
> -			name_rev(parents->item, tip_name, taggerdate,
> -				 generation + 1, distance + 1,
> -				 from_tag, 0);
> +			free_alloc &= name_rev(parents->item, tip_name, taggerdate,
> +					       generation + 1, distance + 1,
> +					       from_tag, 0);

This would be easier to read if it avoided the &=, e.g. by turning it
into:

		} else if (!name_rev(parents->item, tip_name, taggerdate,
				     generation + 1, distance + 1,
				     from_tag, 0))
			free_alloc = 0;

Ciao,
Dscho

>  		}
>  	}
> +
> +	return free_alloc;
>  }
>  
>  static int subpath_matches(const char *path, const char *filter)
> -- 
> 2.20.1
> 
>
diff mbox series

Patch

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index f1cb45c227..0719a9388d 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -77,7 +77,7 @@  static int is_better_name(struct rev_name *name,
 	return 0;
 }
 
-static void name_rev(struct commit *commit,
+static int name_rev(struct commit *commit,
 		const char *tip_name, timestamp_t taggerdate,
 		int generation, int distance, int from_tag,
 		int deref)
@@ -86,11 +86,12 @@  static void name_rev(struct commit *commit,
 	struct commit_list *parents;
 	int parent_number = 1;
 	char *to_free = NULL;
+	int free_alloc = 1;
 
 	parse_commit(commit);
 
 	if (commit->date < cutoff)
-		return;
+		return 1;
 
 	if (deref) {
 		tip_name = to_free = xstrfmt("%s^0", tip_name);
@@ -111,9 +112,10 @@  static void name_rev(struct commit *commit,
 		name->generation = generation;
 		name->distance = distance;
 		name->from_tag = from_tag;
+		free_alloc = 0;
 	} else {
 		free(to_free);
-		return;
+		return 1;
 	}
 
 	for (parents = commit->parents;
@@ -131,15 +133,18 @@  static void name_rev(struct commit *commit,
 				new_name = xstrfmt("%.*s^%d", (int)len, tip_name,
 						   parent_number);
 
-			name_rev(parents->item, new_name, taggerdate, 0,
-				 distance + MERGE_TRAVERSAL_WEIGHT,
-				 from_tag, 0);
+			if (name_rev(parents->item, new_name, taggerdate, 0,
+				      distance + MERGE_TRAVERSAL_WEIGHT,
+				      from_tag, 0))
+				free(new_name);
 		} else {
-			name_rev(parents->item, tip_name, taggerdate,
-				 generation + 1, distance + 1,
-				 from_tag, 0);
+			free_alloc &= name_rev(parents->item, tip_name, taggerdate,
+					       generation + 1, distance + 1,
+					       from_tag, 0);
 		}
 	}
+
+	return free_alloc;
 }
 
 static int subpath_matches(const char *path, const char *filter)