diff mbox series

[3/6] fast-import: release unfreed strbufs

Message ID 20240322000304.76810-4-thalia@archibald.dev (mailing list archive)
State Superseded
Headers show
Series fast-import: tighten parsing of paths | expand

Commit Message

Thalia Archibald March 22, 2024, 12:03 a.m. UTC
These strbufs are owned. Release them at the end of their scopes.

Signed-off-by: Thalia Archibald <thalia@archibald.dev>
---
 builtin/fast-import.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

Comments

Patrick Steinhardt March 28, 2024, 8:21 a.m. UTC | #1
On Fri, Mar 22, 2024 at 12:03:33AM +0000, Thalia Archibald wrote:
> These strbufs are owned. Release them at the end of their scopes.
> 
> Signed-off-by: Thalia Archibald <thalia@archibald.dev>
> ---
>  builtin/fast-import.c | 29 ++++++++++++++++++-----------
>  1 file changed, 18 insertions(+), 11 deletions(-)
> 
> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> index 1b3d6784c1..d6f998f363 100644
> --- a/builtin/fast-import.c
> +++ b/builtin/fast-import.c
> @@ -2364,6 +2364,7 @@ static void file_change_m(const char *p, struct branch *b)
>  	/* Git does not track empty, non-toplevel directories. */
>  	if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *path.buf) {
>  		tree_content_remove(&b->branch_tree, path.buf, NULL, 0);
> +		strbuf_release(&path);
>  		return;
>  	}

Oh, now you get to my comment in the preceding patch. With this patch
we're now in a somewhat weird in-between state where the buffers are
still static, but we release their memory after each call. So we kind of
get the worst of both worlds: static variables without being able to
reuse the buffer's memory.

If we were to change this then we should definitely mark the buffers as
non-static. If so, it would be great to demonstrate that this does not
significantly impact performance.

The same is true for all the other instances.

Patrick

> @@ -2409,11 +2410,11 @@ static void file_change_m(const char *p, struct branch *b)
>  				command_buf.buf);
>  	}
>  
> -	if (!*path.buf) {
> +	if (*path.buf)
> +		tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL);
> +	else
>  		tree_content_replace(&b->branch_tree, &oid, mode, NULL);
> -		return;
> -	}
> -	tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL);
> +	strbuf_release(&path);
>  }
>  
>  static void file_change_d(const char *p, struct branch *b)
> @@ -2422,6 +2423,7 @@ static void file_change_d(const char *p, struct branch *b)
>  
>  	parse_path_eol(&path, p, "path");
>  	tree_content_remove(&b->branch_tree, path.buf, NULL, 1);
> +	strbuf_release(&path);
>  }
>  
>  static void file_change_cr(const char *p, struct branch *b, int rename)
> @@ -2440,17 +2442,18 @@ static void file_change_cr(const char *p, struct branch *b, int rename)
>  		tree_content_get(&b->branch_tree, source.buf, &leaf, 1);
>  	if (!leaf.versions[1].mode)
>  		die("Path %s not in branch", source.buf);
> -	if (!*dest.buf) {	/* C "path/to/subdir" "" */
> +	if (*dest.buf)
> +		tree_content_set(&b->branch_tree, dest.buf,
> +			&leaf.versions[1].oid,
> +			leaf.versions[1].mode,
> +			leaf.tree);
> +	else	/* C "path/to/subdir" "" */
>  		tree_content_replace(&b->branch_tree,
>  			&leaf.versions[1].oid,
>  			leaf.versions[1].mode,
>  			leaf.tree);
> -		return;
> -	}
> -	tree_content_set(&b->branch_tree, dest.buf,
> -		&leaf.versions[1].oid,
> -		leaf.versions[1].mode,
> -		leaf.tree);
> +	strbuf_release(&source);
> +	strbuf_release(&dest);
>  }
>  
>  static void note_change_n(const char *p, struct branch *b, unsigned char *old_fanout)
> @@ -2804,6 +2807,7 @@ static void parse_new_commit(const char *arg)
>  	free(author);
>  	free(committer);
>  	free(encoding);
> +	strbuf_release(&msg);
>  
>  	if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark))
>  		b->pack_id = pack_id;
> @@ -2886,6 +2890,7 @@ static void parse_new_tag(const char *arg)
>  	strbuf_addch(&new_data, '\n');
>  	strbuf_addbuf(&new_data, &msg);
>  	free(tagger);
> +	strbuf_release(&msg);
>  
>  	if (store_object(OBJ_TAG, &new_data, NULL, &t->oid, next_mark))
>  		t->pack_id = MAX_PACK_ID;
> @@ -3171,6 +3176,7 @@ static void print_ls(int mode, const unsigned char *hash, const char *path)
>  		strbuf_addch(&line, '\n');
>  	}
>  	cat_blob_write(line.buf, line.len);
> +	strbuf_release(&line);
>  }
>  
>  static void parse_ls(const char *p, struct branch *b)
> @@ -3206,6 +3212,7 @@ static void parse_ls(const char *p, struct branch *b)
>  		release_tree_content_recursive(leaf.tree);
>  	if (!b || root != &b->branch_tree)
>  		release_tree_entry(root);
> +	strbuf_release(&path);
>  }
>  
>  static void checkpoint(void)
> -- 
> 2.44.0
> 
> 
>
Thalia Archibald April 1, 2024, 9:06 a.m. UTC | #2
(Resending as plain text)

On Mar 28, 2024, at 01:21, Patrick Steinhardt <ps@pks.im> wrote:
> I was about to propose that we should likely also change all of these
> static variables to be local instead. I don't think that we use the
> variables after the function calls. But now that I see that we do it
> like this in all of these helpers I think what's going on is that this
> is a memory optimization to avoid reallocating buffers all the time.
> 
> Ugly, but so be it. We could refactor the code to pass in scratch
> buffers from the outside to remove those static variables. But that
> certainly would be a bigger change and thus likely outside of the scope
> of this patch series.


> Oh, now you get to my comment in the preceding patch. With this patch
> we're now in a somewhat weird in-between state where the buffers are
> still static, but we release their memory after each call. So we kind of
> get the worst of both worlds: static variables without being able to
> reuse the buffer's memory.
> 
> If we were to change this then we should definitely mark the buffers as
> non-static. If so, it would be great to demonstrate that this does not
> significantly impact performance.
> 
> The same is true for all the other instances.

I had glossed that they're `static`, since I've grown accustomed to Rust, where
this sort of non-reentrant code is discouraged. However, this pattern is great
for fast-import, because all of its data is simply freed when it exits at the
end of the stream. I dropped this patch in v2.

I don't think it's worth hoisting these `strbuf`s out. It would only reduce it
from 5 to 2 total static `strbuf`s for paths, but would make ownership less
clear.

Thalia
diff mbox series

Patch

diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 1b3d6784c1..d6f998f363 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -2364,6 +2364,7 @@  static void file_change_m(const char *p, struct branch *b)
 	/* Git does not track empty, non-toplevel directories. */
 	if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *path.buf) {
 		tree_content_remove(&b->branch_tree, path.buf, NULL, 0);
+		strbuf_release(&path);
 		return;
 	}
 
@@ -2409,11 +2410,11 @@  static void file_change_m(const char *p, struct branch *b)
 				command_buf.buf);
 	}
 
-	if (!*path.buf) {
+	if (*path.buf)
+		tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL);
+	else
 		tree_content_replace(&b->branch_tree, &oid, mode, NULL);
-		return;
-	}
-	tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL);
+	strbuf_release(&path);
 }
 
 static void file_change_d(const char *p, struct branch *b)
@@ -2422,6 +2423,7 @@  static void file_change_d(const char *p, struct branch *b)
 
 	parse_path_eol(&path, p, "path");
 	tree_content_remove(&b->branch_tree, path.buf, NULL, 1);
+	strbuf_release(&path);
 }
 
 static void file_change_cr(const char *p, struct branch *b, int rename)
@@ -2440,17 +2442,18 @@  static void file_change_cr(const char *p, struct branch *b, int rename)
 		tree_content_get(&b->branch_tree, source.buf, &leaf, 1);
 	if (!leaf.versions[1].mode)
 		die("Path %s not in branch", source.buf);
-	if (!*dest.buf) {	/* C "path/to/subdir" "" */
+	if (*dest.buf)
+		tree_content_set(&b->branch_tree, dest.buf,
+			&leaf.versions[1].oid,
+			leaf.versions[1].mode,
+			leaf.tree);
+	else	/* C "path/to/subdir" "" */
 		tree_content_replace(&b->branch_tree,
 			&leaf.versions[1].oid,
 			leaf.versions[1].mode,
 			leaf.tree);
-		return;
-	}
-	tree_content_set(&b->branch_tree, dest.buf,
-		&leaf.versions[1].oid,
-		leaf.versions[1].mode,
-		leaf.tree);
+	strbuf_release(&source);
+	strbuf_release(&dest);
 }
 
 static void note_change_n(const char *p, struct branch *b, unsigned char *old_fanout)
@@ -2804,6 +2807,7 @@  static void parse_new_commit(const char *arg)
 	free(author);
 	free(committer);
 	free(encoding);
+	strbuf_release(&msg);
 
 	if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark))
 		b->pack_id = pack_id;
@@ -2886,6 +2890,7 @@  static void parse_new_tag(const char *arg)
 	strbuf_addch(&new_data, '\n');
 	strbuf_addbuf(&new_data, &msg);
 	free(tagger);
+	strbuf_release(&msg);
 
 	if (store_object(OBJ_TAG, &new_data, NULL, &t->oid, next_mark))
 		t->pack_id = MAX_PACK_ID;
@@ -3171,6 +3176,7 @@  static void print_ls(int mode, const unsigned char *hash, const char *path)
 		strbuf_addch(&line, '\n');
 	}
 	cat_blob_write(line.buf, line.len);
+	strbuf_release(&line);
 }
 
 static void parse_ls(const char *p, struct branch *b)
@@ -3206,6 +3212,7 @@  static void parse_ls(const char *p, struct branch *b)
 		release_tree_content_recursive(leaf.tree);
 	if (!b || root != &b->branch_tree)
 		release_tree_entry(root);
+	strbuf_release(&path);
 }
 
 static void checkpoint(void)