diff mbox series

[2/6] fast-import: directly use strbufs for paths

Message ID 20240322000304.76810-3-thalia@archibald.dev (mailing list archive)
State Superseded
Headers show
Series fast-import: tighten parsing of paths | expand

Commit Message

Thalia Archibald March 22, 2024, 12:03 a.m. UTC
Previously, one case would not write the path to the strbuf: when the
path is unquoted and at the end of the string. It was essentially
copy-on-write. However, with the logic simplification of the previous
commit, this case was eliminated and the strbuf is always populated.

Directly use the strbufs now instead of an alias.

Since this already changes all the lines that use the strbufs, rename
them from `uq` to be more descriptive. That they are unquoted is not
their most important property, so name them after what they carry.

Additionally, `file_change_m` no longer needs to copy the path before
reading inline data.

Signed-off-by: Thalia Archibald <thalia@archibald.dev>
---
 builtin/fast-import.c | 54 ++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 32 deletions(-)

Comments

Patrick Steinhardt March 28, 2024, 8:21 a.m. UTC | #1
On Fri, Mar 22, 2024 at 12:03:25AM +0000, Thalia Archibald wrote:
> Previously, one case would not write the path to the strbuf: when the
> path is unquoted and at the end of the string. It was essentially
> copy-on-write. However, with the logic simplification of the previous
> commit, this case was eliminated and the strbuf is always populated.
> 
> Directly use the strbufs now instead of an alias.
> 
> Since this already changes all the lines that use the strbufs, rename
> them from `uq` to be more descriptive. That they are unquoted is not
> their most important property, so name them after what they carry.
> 
> Additionally, `file_change_m` no longer needs to copy the path before
> reading inline data.
> 
> Signed-off-by: Thalia Archibald <thalia@archibald.dev>
> ---
>  builtin/fast-import.c | 54 ++++++++++++++++++-------------------------
>  1 file changed, 22 insertions(+), 32 deletions(-)
> 
> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> index b2adec8d9a..1b3d6784c1 100644
> --- a/builtin/fast-import.c
> +++ b/builtin/fast-import.c
> @@ -2322,7 +2322,7 @@ static void parse_path_space(struct strbuf *sb, const char *p, const char **endp
>  
>  static void file_change_m(const char *p, struct branch *b)
>  {
> -	static struct strbuf uq = STRBUF_INIT;
> +	static struct strbuf path = STRBUF_INIT;

I was about to propose that we should likely also change all of these
static variables to be local instead. I don't think that we use the
variables after the function calls. But now that I see that we do it
like this in all of these helpers I think what's going on is that this
is a memory optimization to avoid reallocating buffers all the time.

Ugly, but so be it. We could refactor the code to pass in scratch
buffers from the outside to remove those static variables. But that
certainly would be a bigger change and thus likely outside of the scope
of this patch series.

Patrick

>  	struct object_entry *oe;
>  	struct object_id oid;
>  	uint16_t mode, inline_data = 0;
> @@ -2359,12 +2359,11 @@ static void file_change_m(const char *p, struct branch *b)
>  			die("Missing space after SHA1: %s", command_buf.buf);
>  	}
>  
> -	parse_path_eol(&uq, p, "path");
> -	p = uq.buf;
> +	parse_path_eol(&path, p, "path");
>  
>  	/* Git does not track empty, non-toplevel directories. */
> -	if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *p) {
> -		tree_content_remove(&b->branch_tree, p, NULL, 0);
> +	if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *path.buf) {
> +		tree_content_remove(&b->branch_tree, path.buf, NULL, 0);
>  		return;
>  	}
>  
> @@ -2385,10 +2384,6 @@ static void file_change_m(const char *p, struct branch *b)
>  		if (S_ISDIR(mode))
>  			die("Directories cannot be specified 'inline': %s",
>  				command_buf.buf);
> -		if (p != uq.buf) {
> -			strbuf_addstr(&uq, p);
> -			p = uq.buf;
> -		}
>  		while (read_next_command() != EOF) {
>  			const char *v;
>  			if (skip_prefix(command_buf.buf, "cat-blob ", &v))
> @@ -2414,49 +2409,45 @@ static void file_change_m(const char *p, struct branch *b)
>  				command_buf.buf);
>  	}
>  
> -	if (!*p) {
> +	if (!*path.buf) {
>  		tree_content_replace(&b->branch_tree, &oid, mode, NULL);
>  		return;
>  	}
> -	tree_content_set(&b->branch_tree, p, &oid, mode, NULL);
> +	tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL);
>  }
>  
>  static void file_change_d(const char *p, struct branch *b)
>  {
> -	static struct strbuf uq = STRBUF_INIT;
> +	static struct strbuf path = STRBUF_INIT;
>  
> -	parse_path_eol(&uq, p, "path");
> -	p = uq.buf;
> -	tree_content_remove(&b->branch_tree, p, NULL, 1);
> +	parse_path_eol(&path, p, "path");
> +	tree_content_remove(&b->branch_tree, path.buf, NULL, 1);
>  }
>  
>  static void file_change_cr(const char *p, struct branch *b, int rename)
>  {
> -	const char *s, *d;
> -	static struct strbuf s_uq = STRBUF_INIT;
> -	static struct strbuf d_uq = STRBUF_INIT;
> +	static struct strbuf source = STRBUF_INIT;
> +	static struct strbuf dest = STRBUF_INIT;
>  	struct tree_entry leaf;
>  
> -	parse_path_space(&s_uq, p, &p, "source");
> -	parse_path_eol(&d_uq, p, "dest");
> -	s = s_uq.buf;
> -	d = d_uq.buf;
> +	parse_path_space(&source, p, &p, "source");
> +	parse_path_eol(&dest, p, "dest");
>  
>  	memset(&leaf, 0, sizeof(leaf));
>  	if (rename)
> -		tree_content_remove(&b->branch_tree, s, &leaf, 1);
> +		tree_content_remove(&b->branch_tree, source.buf, &leaf, 1);
>  	else
> -		tree_content_get(&b->branch_tree, s, &leaf, 1);
> +		tree_content_get(&b->branch_tree, source.buf, &leaf, 1);
>  	if (!leaf.versions[1].mode)
> -		die("Path %s not in branch", s);
> -	if (!*d) {	/* C "path/to/subdir" "" */
> +		die("Path %s not in branch", source.buf);
> +	if (!*dest.buf) {	/* C "path/to/subdir" "" */
>  		tree_content_replace(&b->branch_tree,
>  			&leaf.versions[1].oid,
>  			leaf.versions[1].mode,
>  			leaf.tree);
>  		return;
>  	}
> -	tree_content_set(&b->branch_tree, d,
> +	tree_content_set(&b->branch_tree, dest.buf,
>  		&leaf.versions[1].oid,
>  		leaf.versions[1].mode,
>  		leaf.tree);
> @@ -3186,7 +3177,7 @@ static void parse_ls(const char *p, struct branch *b)
>  {
>  	struct tree_entry *root = NULL;
>  	struct tree_entry leaf = {NULL};
> -	static struct strbuf uq = STRBUF_INIT;
> +	static struct strbuf path = STRBUF_INIT;
>  
>  	/* ls SP (<tree-ish> SP)? <path> */
>  	if (*p == '"') {
> @@ -3201,9 +3192,8 @@ static void parse_ls(const char *p, struct branch *b)
>  			root->versions[1].mode = S_IFDIR;
>  		load_tree(root);
>  	}
> -	parse_path_eol(&uq, p, "path");
> -	p = uq.buf;
> -	tree_content_get(root, p, &leaf, 1);
> +	parse_path_eol(&path, p, "path");
> +	tree_content_get(root, path.buf, &leaf, 1);
>  	/*
>  	 * A directory in preparation would have a sha1 of zero
>  	 * until it is saved.  Save, for simplicity.
> @@ -3211,7 +3201,7 @@ static void parse_ls(const char *p, struct branch *b)
>  	if (S_ISDIR(leaf.versions[1].mode))
>  		store_tree(&leaf);
>  
> -	print_ls(leaf.versions[1].mode, leaf.versions[1].oid.hash, p);
> +	print_ls(leaf.versions[1].mode, leaf.versions[1].oid.hash, path.buf);
>  	if (leaf.tree)
>  		release_tree_content_recursive(leaf.tree);
>  	if (!b || root != &b->branch_tree)
> -- 
> 2.44.0
> 
> 
>
diff mbox series

Patch

diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index b2adec8d9a..1b3d6784c1 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -2322,7 +2322,7 @@  static void parse_path_space(struct strbuf *sb, const char *p, const char **endp
 
 static void file_change_m(const char *p, struct branch *b)
 {
-	static struct strbuf uq = STRBUF_INIT;
+	static struct strbuf path = STRBUF_INIT;
 	struct object_entry *oe;
 	struct object_id oid;
 	uint16_t mode, inline_data = 0;
@@ -2359,12 +2359,11 @@  static void file_change_m(const char *p, struct branch *b)
 			die("Missing space after SHA1: %s", command_buf.buf);
 	}
 
-	parse_path_eol(&uq, p, "path");
-	p = uq.buf;
+	parse_path_eol(&path, p, "path");
 
 	/* Git does not track empty, non-toplevel directories. */
-	if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *p) {
-		tree_content_remove(&b->branch_tree, p, NULL, 0);
+	if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *path.buf) {
+		tree_content_remove(&b->branch_tree, path.buf, NULL, 0);
 		return;
 	}
 
@@ -2385,10 +2384,6 @@  static void file_change_m(const char *p, struct branch *b)
 		if (S_ISDIR(mode))
 			die("Directories cannot be specified 'inline': %s",
 				command_buf.buf);
-		if (p != uq.buf) {
-			strbuf_addstr(&uq, p);
-			p = uq.buf;
-		}
 		while (read_next_command() != EOF) {
 			const char *v;
 			if (skip_prefix(command_buf.buf, "cat-blob ", &v))
@@ -2414,49 +2409,45 @@  static void file_change_m(const char *p, struct branch *b)
 				command_buf.buf);
 	}
 
-	if (!*p) {
+	if (!*path.buf) {
 		tree_content_replace(&b->branch_tree, &oid, mode, NULL);
 		return;
 	}
-	tree_content_set(&b->branch_tree, p, &oid, mode, NULL);
+	tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL);
 }
 
 static void file_change_d(const char *p, struct branch *b)
 {
-	static struct strbuf uq = STRBUF_INIT;
+	static struct strbuf path = STRBUF_INIT;
 
-	parse_path_eol(&uq, p, "path");
-	p = uq.buf;
-	tree_content_remove(&b->branch_tree, p, NULL, 1);
+	parse_path_eol(&path, p, "path");
+	tree_content_remove(&b->branch_tree, path.buf, NULL, 1);
 }
 
 static void file_change_cr(const char *p, struct branch *b, int rename)
 {
-	const char *s, *d;
-	static struct strbuf s_uq = STRBUF_INIT;
-	static struct strbuf d_uq = STRBUF_INIT;
+	static struct strbuf source = STRBUF_INIT;
+	static struct strbuf dest = STRBUF_INIT;
 	struct tree_entry leaf;
 
-	parse_path_space(&s_uq, p, &p, "source");
-	parse_path_eol(&d_uq, p, "dest");
-	s = s_uq.buf;
-	d = d_uq.buf;
+	parse_path_space(&source, p, &p, "source");
+	parse_path_eol(&dest, p, "dest");
 
 	memset(&leaf, 0, sizeof(leaf));
 	if (rename)
-		tree_content_remove(&b->branch_tree, s, &leaf, 1);
+		tree_content_remove(&b->branch_tree, source.buf, &leaf, 1);
 	else
-		tree_content_get(&b->branch_tree, s, &leaf, 1);
+		tree_content_get(&b->branch_tree, source.buf, &leaf, 1);
 	if (!leaf.versions[1].mode)
-		die("Path %s not in branch", s);
-	if (!*d) {	/* C "path/to/subdir" "" */
+		die("Path %s not in branch", source.buf);
+	if (!*dest.buf) {	/* C "path/to/subdir" "" */
 		tree_content_replace(&b->branch_tree,
 			&leaf.versions[1].oid,
 			leaf.versions[1].mode,
 			leaf.tree);
 		return;
 	}
-	tree_content_set(&b->branch_tree, d,
+	tree_content_set(&b->branch_tree, dest.buf,
 		&leaf.versions[1].oid,
 		leaf.versions[1].mode,
 		leaf.tree);
@@ -3186,7 +3177,7 @@  static void parse_ls(const char *p, struct branch *b)
 {
 	struct tree_entry *root = NULL;
 	struct tree_entry leaf = {NULL};
-	static struct strbuf uq = STRBUF_INIT;
+	static struct strbuf path = STRBUF_INIT;
 
 	/* ls SP (<tree-ish> SP)? <path> */
 	if (*p == '"') {
@@ -3201,9 +3192,8 @@  static void parse_ls(const char *p, struct branch *b)
 			root->versions[1].mode = S_IFDIR;
 		load_tree(root);
 	}
-	parse_path_eol(&uq, p, "path");
-	p = uq.buf;
-	tree_content_get(root, p, &leaf, 1);
+	parse_path_eol(&path, p, "path");
+	tree_content_get(root, path.buf, &leaf, 1);
 	/*
 	 * A directory in preparation would have a sha1 of zero
 	 * until it is saved.  Save, for simplicity.
@@ -3211,7 +3201,7 @@  static void parse_ls(const char *p, struct branch *b)
 	if (S_ISDIR(leaf.versions[1].mode))
 		store_tree(&leaf);
 
-	print_ls(leaf.versions[1].mode, leaf.versions[1].oid.hash, p);
+	print_ls(leaf.versions[1].mode, leaf.versions[1].oid.hash, path.buf);
 	if (leaf.tree)
 		release_tree_content_recursive(leaf.tree);
 	if (!b || root != &b->branch_tree)