diff mbox series

[09/10] name-rev: generate name strings only if they are better

Message ID 77d1d053-8680-5cbe-9182-b6aec9e9b446@web.de (mailing list archive)
State New, archived
Headers show
Series name-rev: improve memory usage | expand

Commit Message

René Scharfe Feb. 4, 2020, 9:25 p.m. UTC
Leave setting the tip_name member of struct rev_name to callers of
create_or_update_name().  This avoids allocations for names that are
rejected by that function.  Here's how this affects the runtime when
working with a fresh clone of Git's own repository; performance numbers
by hyperfine before:

Benchmark #1: ./git -C ../git-pristine/ name-rev --all
  Time (mean ± σ):     437.8 ms ±   4.0 ms    [User: 422.5 ms, System: 15.2 ms]
  Range (min … max):   432.8 ms … 446.3 ms    10 runs

... and with this patch:

Benchmark #1: ./git -C ../git-pristine/ name-rev --all
  Time (mean ± σ):     408.5 ms ±   1.4 ms    [User: 387.2 ms, System: 21.2 ms]
  Range (min … max):   407.1 ms … 411.7 ms    10 runs

Signed-off-by: René Scharfe <l.s.r@web.de>
---
 builtin/name-rev.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

--
2.25.0

Comments

Derrick Stolee Feb. 5, 2020, 3:11 p.m. UTC | #1
On 2/4/2020 4:25 PM, René Scharfe wrote:
> -			if (create_or_update_name(parent, new_name, taggerdate,
> -						  generation, distance,
> -						  from_tag)) {
> +			parent_name = create_or_update_name(parent, taggerdate,
> +							    generation,
> +							    distance, from_tag);
> +			if (parent_name) {

As someone unfamiliar with the name-rev code, it took me a while to see why
the algorithm isn't exponential in complexity. It technically _is_, but it
is of the form 2^{N / MERGE_TRAVERSAL_WEIGHT} = 2^{N / 65535} and only if
we create a particularly nasty commit history that would never appear in the wild.

But, the critical section is the block above. The confusing part was that
create_or_update_name() returns NULL if the name is updated, but non-NULL if
a better name is created. That makes it clear that this change is saving
allocations.

Thanks,
-Stolee
René Scharfe Feb. 5, 2020, 3:50 p.m. UTC | #2
Am 05.02.20 um 16:11 schrieb Derrick Stolee:
> On 2/4/2020 4:25 PM, René Scharfe wrote:
>> -			if (create_or_update_name(parent, new_name, taggerdate,
>> -						  generation, distance,
>> -						  from_tag)) {
>> +			parent_name = create_or_update_name(parent, taggerdate,
>> +							    generation,
>> +							    distance, from_tag);
>> +			if (parent_name) {
>
> As someone unfamiliar with the name-rev code, it took me a while to see why
> the algorithm isn't exponential in complexity. It technically _is_, but it
> is of the form 2^{N / MERGE_TRAVERSAL_WEIGHT} = 2^{N / 65535} and only if
> we create a particularly nasty commit history that would never appear in the wild.

As I understand it, name_rev() attaches a name (a tag or other reference) to a
commit, and then goes through all its ancestors, depth first, and attaches a
name based on that to all of them as well.  It stops traversal if the name is
not better than a previously attached name.  Each commit has at most one name.

If we have N commits and M tags then we could traverse N * M commits and set
their names in the worst case, if the M tags are all for the very latest
commit and if the tags are ordered from worst to best.  Right?

Which implies that ordering names before attaching them one by one should
yield another speedup.  Older tags are preferred and should be tried first,
followed by younger tags and non-tags.  Hmm.. :)

> But, the critical section is the block above. The confusing part was that
> create_or_update_name() returns NULL if the name is updated, but non-NULL if
> a better name is created. That makes it clear that this change is saving
> allocations.

create_or_update_name() returns NULL if the new name is not better than an
existing one, and the old name is left untouched.  It returns a pointer to
the name record if there either was no old name or if that name was worse.
In that case it updates the name.

After this patch it doesn't set the name string (tip_name) anymore, which
is left for the caller.  And the benefit is that the caller only needs to
prepare said name string if the new name is better.

René
diff mbox series

Patch

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 793356edd1..98f55bcea9 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -81,7 +81,6 @@  static int is_better_name(struct rev_name *name,
 }

 static struct rev_name *create_or_update_name(struct commit *commit,
-					      const char *tip_name,
 					      timestamp_t taggerdate,
 					      int generation, int distance,
 					      int from_tag)
@@ -92,7 +91,6 @@  static struct rev_name *create_or_update_name(struct commit *commit,
 	    !is_better_name(name, taggerdate, distance, from_tag))
 		return NULL;

-	name->tip_name = tip_name;
 	name->taggerdate = taggerdate;
 	name->generation = generation;
 	name->distance = distance;
@@ -130,22 +128,20 @@  static void name_rev(struct commit *start_commit,
 	struct commit *commit;
 	struct commit **parents_to_queue = NULL;
 	size_t parents_to_queue_nr, parents_to_queue_alloc = 0;
-	char *to_free = NULL;
+	struct rev_name *start_name;

 	parse_commit(start_commit);
 	if (start_commit->date < cutoff)
 		return;

+	start_name = create_or_update_name(start_commit, taggerdate, 0, 0,
+					   from_tag);
+	if (!start_name)
+		return;
 	if (deref)
-		tip_name = to_free = xstrfmt("%s^0", tip_name);
+		start_name->tip_name = xstrfmt("%s^0", tip_name);
 	else
-		tip_name = to_free = xstrdup(tip_name);
-
-	if (!create_or_update_name(start_commit, tip_name, taggerdate, 0, 0,
-				   from_tag)) {
-		free(to_free);
-		return;
-	}
+		start_name->tip_name = xstrdup(tip_name);

 	memset(&queue, 0, sizeof(queue)); /* Use the prio_queue as LIFO */
 	prio_queue_put(&queue, start_commit);
@@ -161,7 +157,7 @@  static void name_rev(struct commit *start_commit,
 				parents;
 				parents = parents->next, parent_number++) {
 			struct commit *parent = parents->item;
-			const char *new_name;
+			struct rev_name *parent_name;
 			int generation, distance;

 			parse_commit(parent);
@@ -169,18 +165,23 @@  static void name_rev(struct commit *start_commit,
 				continue;

 			if (parent_number > 1) {
-				new_name = get_parent_name(name, parent_number);
 				generation = 0;
 				distance = name->distance + MERGE_TRAVERSAL_WEIGHT;
 			} else {
-				new_name = name->tip_name;
 				generation = name->generation + 1;
 				distance = name->distance + 1;
 			}

-			if (create_or_update_name(parent, new_name, taggerdate,
-						  generation, distance,
-						  from_tag)) {
+			parent_name = create_or_update_name(parent, taggerdate,
+							    generation,
+							    distance, from_tag);
+			if (parent_name) {
+				if (parent_number > 1)
+					parent_name->tip_name =
+						get_parent_name(name,
+								parent_number);
+				else
+					parent_name->tip_name = name->tip_name;
 				ALLOC_GROW(parents_to_queue,
 					   parents_to_queue_nr + 1,
 					   parents_to_queue_alloc);