diff mbox series

[v2,12/17] mktree: create tree using an in-core index

Message ID 2333775ba5bd71766a6aece87e39a6d189aeaead.1718834285.git.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series mktree: support more flexible usage | expand

Commit Message

Victoria Dye June 19, 2024, 9:58 p.m. UTC
From: Victoria Dye <vdye@github.com>

Rather than manually write out the contents of a tree object file, construct
an in-memory sparse index from the provided tree entries and create the tree
by writing out its corresponding cache tree.

This patch does not change the behavior of the 'mktree' command. However,
constructing the tree this way will substantially simplify future extensions
to the command's functionality, including handling deeper-than-toplevel tree
entries and applying the provided entries to an existing tree.

Signed-off-by: Victoria Dye <vdye@github.com>
---
 builtin/mktree.c | 74 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 55 insertions(+), 19 deletions(-)

Comments

Junio C Hamano June 20, 2024, 10:26 p.m. UTC | #1
"Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:

> @@ -60,17 +66,25 @@ static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
>  	if (literally) {
>  		FLEX_ALLOC_MEM(ent, name, path, len);
>  	} else {
> +		size_t len_to_copy = len;
> +
>  		/* Normalize and validate entry path */
>  		if (S_ISDIR(mode)) {
> -			while(len > 0 && is_dir_sep(path[len - 1]))
> -				len--;
> +			while(len_to_copy > 0 && is_dir_sep(path[len_to_copy - 1]))

Let's fix the style issue while at it, as we are doing other changes
in this step anyway.  "while(" -> "while (".

> +				len_to_copy--;
> +			len = len_to_copy + 1; /* add space for trailing slash */

Do we need to do st_add() here?  Perhaps not, but I just noticed the
careful use of st_add3() below, so...

> +		ent = xcalloc(1, st_add3(sizeof(struct tree_entry), len, 1));

> +		memcpy(ent->name, path, len_to_copy);
>  
>  		if (!verify_path(ent->name, mode))
>  			die(_("invalid path '%s'"), path);
>  		if (strchr(ent->name, '/'))
>  			die("path %s contains slash", path);
> +
> +		/* Add trailing slash to dir */
> +		if (S_ISDIR(mode))
> +			ent->name[len - 1] = '/';

OK.

> @@ -88,11 +102,14 @@ static int ent_compare(const void *a_, const void *b_, void *ctx)
>  	struct tree_entry *b = *(struct tree_entry **)b_;
>  	int ignore_mode = *((int *)ctx);
>  
> -	if (ignore_mode)
> -		cmp = name_compare(a->name, a->len, b->name, b->len);
> -	else
> -		cmp = base_name_compare(a->name, a->len, a->mode,
> -					b->name, b->len, b->mode);
> +	size_t a_len = a->len, b_len = b->len;
> +
> +	if (ignore_mode) {
> +		a_len = df_path_len(a_len, a->mode);
> +		b_len = df_path_len(b_len, b->mode);
> +	}
> +
> +	cmp = name_compare(a->name, a_len, b->name, b_len);
>  	return cmp ? cmp : b->order - a->order;
>  }

OK, now the "mode" is sort of "encoded" already in the "name" by the
slash at the end, the way "ignore-mode" works needs to be redesigned.

If we are ignoring mode, we are dropping the trailing '/' and
otherwise we just feed the name with possible trailing '/', and the
same name_compare() can be used.  OK.

> @@ -108,8 +125,8 @@ static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
>  	for (size_t i = 0; i < count; i++) {
>  		struct tree_entry *curr = arr->entries[i];
>  		if (prev &&
> -		    !name_compare(prev->name, prev->len,
> -				  curr->name, curr->len)) {
> +		    !name_compare(prev->name, df_path_len(prev->len, prev->mode),
> +				  curr->name, df_path_len(curr->len, curr->mode))) {
>  			FREE_AND_NULL(curr);

And here is the matching adjustment for the dedup comparison, which
makes sense.

> @@ -122,24 +139,43 @@ static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
>  	QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
>  }
>  
> +static int add_tree_entry_to_index(struct index_state *istate,
> +				   struct tree_entry *ent)
> +{
> +	struct cache_entry *ce;
> +	struct strbuf ce_name = STRBUF_INIT;
> +	strbuf_add(&ce_name, ent->name, ent->len);
> +

Perhaps swap the first statement (which is strbuf_add()) and the
blank line that ought to separate the decls and the first statement?

> +	ce = make_cache_entry(istate, ent->mode, &ent->oid, ent->name, 0, 0);
> +	if (!ce)
> +		return error(_("make_cache_entry failed for path '%s'"), ent->name);
> +
> +	add_index_entry(istate, ce, ADD_CACHE_JUST_APPEND);
> +	strbuf_release(&ce_name);
> +	return 0;
> +}

This is only to append; presumably the caller drives this function
out of a sorted list.

>  static void write_tree(struct tree_entry_array *arr, struct object_id *oid)
>  {
> +	struct index_state istate = INDEX_STATE_INIT(the_repository);
> +	istate.sparse_index = 1;
>  
>  	sort_and_dedup_tree_entry_array(arr);
>  
> +	/* Construct an in-memory index from the provided entries */
>  	for (size_t i = 0; i < arr->nr; i++) {
>  		struct tree_entry *ent = arr->entries[i];
> +
> +		if (add_tree_entry_to_index(&istate, ent))
> +			die(_("failed to add tree entry '%s'"), ent->name);
>  	}
> +	/* Write out new tree */
> +	if (cache_tree_update(&istate, WRITE_TREE_SILENT | WRITE_TREE_MISSING_OK))
> +		die(_("failed to write tree"));

Hmph.  Are we doing any run-time verification of what we produce
(e.g., if sort_and_dedup_tree_entry_array() fails to dedup or sort
correctly due to a bug or two, would cache_tree_update() notice that
the in-core index array is fishy)?  I am not suggesting to add an
unconditional "we appended to the index, so we should sort the
entries in it" step before cache_tree_update() call.  It is the
opposite---if we have extra checks in cache_tree_udpate() to slow us
down and if we are confident that the loop that added tree entries
to the index is correct, if we can bypass such checks.

> +	oidcpy(oid, &istate.cache_tree->oid);
> +
> +	release_index(&istate);
>  }

This is the gem of the whole series.  Clever.

What is so satisfying is that it takes not that much of code to
replace the "here is a flat buffer of what the contents of a single
tree object ought to look like" with "let's build in-core index and
write it out just like write-tree would".  Nice.
diff mbox series

Patch

diff --git a/builtin/mktree.c b/builtin/mktree.c
index a91d3a7b028..3ce8d3dc524 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -4,6 +4,7 @@ 
  * Copyright (c) Junio C Hamano, 2006, 2009
  */
 #include "builtin.h"
+#include "cache-tree.h"
 #include "gettext.h"
 #include "hex.h"
 #include "index-info.h"
@@ -24,6 +25,11 @@  struct tree_entry {
 	char name[FLEX_ARRAY];
 };
 
+static inline size_t df_path_len(size_t pathlen, unsigned int mode)
+{
+	return S_ISDIR(mode) ? pathlen - 1 : pathlen;
+}
+
 struct tree_entry_array {
 	size_t nr, alloc;
 	struct tree_entry **entries;
@@ -60,17 +66,25 @@  static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
 	if (literally) {
 		FLEX_ALLOC_MEM(ent, name, path, len);
 	} else {
+		size_t len_to_copy = len;
+
 		/* Normalize and validate entry path */
 		if (S_ISDIR(mode)) {
-			while(len > 0 && is_dir_sep(path[len - 1]))
-				len--;
+			while(len_to_copy > 0 && is_dir_sep(path[len_to_copy - 1]))
+				len_to_copy--;
+			len = len_to_copy + 1; /* add space for trailing slash */
 		}
-		FLEX_ALLOC_MEM(ent, name, path, len);
+		ent = xcalloc(1, st_add3(sizeof(struct tree_entry), len, 1));
+		memcpy(ent->name, path, len_to_copy);
 
 		if (!verify_path(ent->name, mode))
 			die(_("invalid path '%s'"), path);
 		if (strchr(ent->name, '/'))
 			die("path %s contains slash", path);
+
+		/* Add trailing slash to dir */
+		if (S_ISDIR(mode))
+			ent->name[len - 1] = '/';
 	}
 
 	ent->mode = mode;
@@ -88,11 +102,14 @@  static int ent_compare(const void *a_, const void *b_, void *ctx)
 	struct tree_entry *b = *(struct tree_entry **)b_;
 	int ignore_mode = *((int *)ctx);
 
-	if (ignore_mode)
-		cmp = name_compare(a->name, a->len, b->name, b->len);
-	else
-		cmp = base_name_compare(a->name, a->len, a->mode,
-					b->name, b->len, b->mode);
+	size_t a_len = a->len, b_len = b->len;
+
+	if (ignore_mode) {
+		a_len = df_path_len(a_len, a->mode);
+		b_len = df_path_len(b_len, b->mode);
+	}
+
+	cmp = name_compare(a->name, a_len, b->name, b_len);
 	return cmp ? cmp : b->order - a->order;
 }
 
@@ -108,8 +125,8 @@  static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
 	for (size_t i = 0; i < count; i++) {
 		struct tree_entry *curr = arr->entries[i];
 		if (prev &&
-		    !name_compare(prev->name, prev->len,
-				  curr->name, curr->len)) {
+		    !name_compare(prev->name, df_path_len(prev->len, prev->mode),
+				  curr->name, df_path_len(curr->len, curr->mode))) {
 			FREE_AND_NULL(curr);
 		} else {
 			arr->entries[arr->nr++] = curr;
@@ -122,24 +139,43 @@  static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
 	QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
 }
 
+static int add_tree_entry_to_index(struct index_state *istate,
+				   struct tree_entry *ent)
+{
+	struct cache_entry *ce;
+	struct strbuf ce_name = STRBUF_INIT;
+	strbuf_add(&ce_name, ent->name, ent->len);
+
+	ce = make_cache_entry(istate, ent->mode, &ent->oid, ent->name, 0, 0);
+	if (!ce)
+		return error(_("make_cache_entry failed for path '%s'"), ent->name);
+
+	add_index_entry(istate, ce, ADD_CACHE_JUST_APPEND);
+	strbuf_release(&ce_name);
+	return 0;
+}
+
 static void write_tree(struct tree_entry_array *arr, struct object_id *oid)
 {
-	struct strbuf buf;
-	size_t size = 0;
+	struct index_state istate = INDEX_STATE_INIT(the_repository);
+	istate.sparse_index = 1;
 
 	sort_and_dedup_tree_entry_array(arr);
-	for (size_t i = 0; i < arr->nr; i++)
-		size += 32 + arr->entries[i]->len;
 
-	strbuf_init(&buf, size);
+	/* Construct an in-memory index from the provided entries */
 	for (size_t i = 0; i < arr->nr; i++) {
 		struct tree_entry *ent = arr->entries[i];
-		strbuf_addf(&buf, "%o %s%c", ent->mode, ent->name, '\0');
-		strbuf_add(&buf, ent->oid.hash, the_hash_algo->rawsz);
+
+		if (add_tree_entry_to_index(&istate, ent))
+			die(_("failed to add tree entry '%s'"), ent->name);
 	}
 
-	write_object_file(buf.buf, buf.len, OBJ_TREE, oid);
-	strbuf_release(&buf);
+	/* Write out new tree */
+	if (cache_tree_update(&istate, WRITE_TREE_SILENT | WRITE_TREE_MISSING_OK))
+		die(_("failed to write tree"));
+	oidcpy(oid, &istate.cache_tree->oid);
+
+	release_index(&istate);
 }
 
 static void write_tree_literally(struct tree_entry_array *arr,