diff mbox series

[v3,4/5] Always check `parse_tree*()`'s return value

Message ID 9e4dc94ef036882c3ce27208ca9fa545d018f199.1708612605.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series merge-tree: handle missing objects correctly | expand

Commit Message

Johannes Schindelin Feb. 22, 2024, 2:36 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

Otherwise we may easily run into serious crashes: For example, if we run
`init_tree_desc()` directly after a failed `parse_tree()`, we are
accessing uninitialized data or trying to dereference `NULL`.

Note that the `parse_tree()` function already takes care of showing an
error message. The `parse_tree_indirectly()` and
`repo_get_commit_tree()` functions do not, therefore those latter call
sites need to show a useful error message while the former do not.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/checkout.c   | 19 ++++++++++++++++---
 builtin/clone.c      |  3 ++-
 builtin/commit.c     |  3 ++-
 builtin/merge-tree.c |  6 ++++++
 builtin/read-tree.c  |  3 ++-
 builtin/reset.c      |  4 ++++
 cache-tree.c         |  4 ++--
 merge-ort.c          |  3 +++
 merge-recursive.c    |  3 ++-
 merge.c              |  5 ++++-
 reset.c              |  5 +++++
 sequencer.c          |  4 ++++
 12 files changed, 52 insertions(+), 10 deletions(-)

Comments

Junio C Hamano Feb. 22, 2024, 5:58 p.m. UTC | #1
"Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
writes:

> @@ -707,7 +707,8 @@ static int reset_tree(struct tree *tree, const struct checkout_opts *o,
>  	init_checkout_metadata(&opts.meta, info->refname,
>  			       info->commit ? &info->commit->object.oid : null_oid(),
>  			       NULL);
> -	parse_tree(tree);
> +	if (parse_tree(tree) < 0)
> +		return 128;

The other error returned from this function is when unpack_trees()
fails well before the writeout phase and the value we return is also
128, so the caller is prepared to act on it.  OK.

> @@ -786,9 +787,15 @@ static int merge_working_tree(const struct checkout_opts *opts,
>  		if (new_branch_info->commit)
>  			BUG("'switch --orphan' should never accept a commit as starting point");
>  		new_tree = parse_tree_indirect(the_hash_algo->empty_tree);
> -	} else
> +		if (!new_tree)
> +			BUG("unable to read empty tree");

parse_tree() of the_hash_algo->empty_tree should result in a tree
object without having to even consult the object store, so BUG(),
not die(), is very much appropriate here.  OK.

> +	} else {
>  		new_tree = repo_get_commit_tree(the_repository,
>  						new_branch_info->commit);
> +		if (!new_tree)
> +			return error(_("unable to read tree %s"),
> +				     oid_to_hex(&new_branch_info->commit->object.oid));

We can help translators by enclosing %s inside a pair of parentheses.

    $ git grep -h 'msgid .*unable to read tree' po | sort | uniq -c
     18 msgid "unable to read tree (%s)"

FYI, the message with "(%s)" is shared by four places; there is one
instance of the message without parentheses added very recently that
forced .po files to have both entries.  We probably should unify them
to use the one with more existing users.

> @@ -823,7 +830,8 @@ static int merge_working_tree(const struct checkout_opts *opts,
>  				oid_to_hex(old_commit_oid));
>  
>  		init_tree_desc(&trees[0], tree->buffer, tree->size);
> -		parse_tree(new_tree);
> +		if (parse_tree(new_tree) < 0)
> +			exit(128);

There is another exit() in the same else clause this code is in, and
upon failing to unpack_trees(), that call exits with 128.  This
parse_tree() is about preparing the input for that call, so it makes
sense to exit with the same code.  Excellent.

> diff --git a/builtin/clone.c b/builtin/clone.c
> index c6357af9498..4410b55be98 100644
> --- a/builtin/clone.c
> +++ b/builtin/clone.c
> @@ -736,7 +736,8 @@ static int checkout(int submodule_progress, int filter_submodules)
>  	tree = parse_tree_indirect(&oid);
>  	if (!tree)
>  		die(_("unable to parse commit %s"), oid_to_hex(&oid));
> -	parse_tree(tree);
> +	if (parse_tree(tree) < 0)
> +		exit(128);
>  	init_tree_desc(&t, tree->buffer, tree->size);
>  	if (unpack_trees(1, &t, &opts) < 0)
>  		die(_("unable to checkout working tree"));

Exactly the same reasoning applies, as die() exits with 128.

We may want to "#define EXIT_DIE 128" and use it in appropriate
places to make such a reasoning/review easier (possibly an entry for
#leftoverbits)?

> diff --git a/builtin/commit.c b/builtin/commit.c
> index 781af2e206c..0723f06de7a 100644
> --- a/builtin/commit.c
> +++ b/builtin/commit.c
> @@ -339,7 +339,8 @@ static void create_base_index(const struct commit *current_head)
>  	tree = parse_tree_indirect(&current_head->object.oid);
>  	if (!tree)
>  		die(_("failed to unpack HEAD tree object"));
> -	parse_tree(tree);
> +	if (parse_tree(tree) < 0)
> +		exit(128);
>  	init_tree_desc(&t, tree->buffer, tree->size);
>  	if (unpack_trees(1, &t, &opts))
>  		exit(128); /* We've already reported the error, finish dying */

Ditto.

> diff --git a/builtin/read-tree.c b/builtin/read-tree.c
> index 8196ca9dd85..5923ed36893 100644
> --- a/builtin/read-tree.c
> +++ b/builtin/read-tree.c
> @@ -263,7 +263,8 @@ int cmd_read_tree(int argc, const char **argv, const char *cmd_prefix)
>  	cache_tree_free(&the_index.cache_tree);
>  	for (i = 0; i < nr_trees; i++) {
>  		struct tree *tree = trees[i];
> -		parse_tree(tree);
> +		if (parse_tree(tree) < 0)
> +			return 128;
>  		init_tree_desc(t+i, tree->buffer, tree->size);
>  	}
>  	if (unpack_trees(nr_trees, t, &opts))

Ditto.  After the post-context we also return 128.

> diff --git a/builtin/reset.c b/builtin/reset.c
> index 4b018d20e3b..f030f57f4e9 100644
> --- a/builtin/reset.c
> +++ b/builtin/reset.c
> @@ -119,6 +119,10 @@ static int reset_index(const char *ref, const struct object_id *oid, int reset_t
>  
>  	if (reset_type == MIXED || reset_type == HARD) {
>  		tree = parse_tree_indirect(oid);
> +		if (!tree) {
> +			error(_("unable to read tree %s"), oid_to_hex(oid));
> +			goto out;
> +		}
>  		prime_cache_tree(the_repository, the_repository->index, tree);
>  	}

Good.

> diff --git a/cache-tree.c b/cache-tree.c
> index 641427ed410..c6508b64a5c 100644
> --- a/cache-tree.c
> +++ b/cache-tree.c
> @@ -779,8 +779,8 @@ static void prime_cache_tree_rec(struct repository *r,
>  			struct cache_tree_sub *sub;
>  			struct tree *subtree = lookup_tree(r, &entry.oid);
>  
> -			if (!subtree->object.parsed)
> -				parse_tree(subtree);
> +			if (!subtree->object.parsed && parse_tree(subtree) < 0)
> +				exit(128);
>  			sub = cache_tree_sub(it, entry.path);
>  			sub->cache_tree = cache_tree();

The cache_tree() used to be just an optimization mechanism, but
there is no other way than fully populating it to write a tree
object out of the index, so dying here is the only sensible thing to
do upon unparseable subtree.  Otherwise we would end up silently
writing a bogus result.  Good.

> diff --git a/merge-recursive.c b/merge-recursive.c
> index e3beb0801b1..10d41bfd487 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -410,7 +410,8 @@ static inline int merge_detect_rename(struct merge_options *opt)
>  
>  static void init_tree_desc_from_tree(struct tree_desc *desc, struct tree *tree)
>  {
> -	parse_tree(tree);
> +	if (parse_tree(tree) < 0)
> +		exit(128);
>  	init_tree_desc(desc, tree->buffer, tree->size);
>  }

OK.

> diff --git a/merge.c b/merge.c
> index b60925459c2..14a7325859d 100644
> --- a/merge.c
> +++ b/merge.c
> @@ -80,7 +80,10 @@ int checkout_fast_forward(struct repository *r,
>  		return -1;
>  	}
>  	for (i = 0; i < nr_trees; i++) {
> -		parse_tree(trees[i]);
> +		if (parse_tree(trees[i]) < 0) {
> +			rollback_lock_file(&lock_file);
> +			return -1;
> +		}
>  		init_tree_desc(t+i, trees[i]->buffer, trees[i]->size);
>  	}

This handles the error in the same way as the other case earlier
where any of the tree-ish failed to load.  OK.

> diff --git a/reset.c b/reset.c
> index 48da0adf851..a93fdbc12e3 100644
> --- a/reset.c
> +++ b/reset.c
> @@ -158,6 +158,11 @@ int reset_head(struct repository *r, const struct reset_head_opts *opts)
>  	}
>  
>  	tree = parse_tree_indirect(oid);
> +	if (!tree) {
> +		ret = error(_("unable to read tree %s"), oid_to_hex(oid));
> +		goto leave_reset_head;
> +	}

OK, but the _("unable to read tree (%s)") comment applies here, too.

> diff --git a/sequencer.c b/sequencer.c
> index d584cac8ed9..407473bab28 100644
> --- a/sequencer.c
> +++ b/sequencer.c
> @@ -715,6 +715,8 @@ static int do_recursive_merge(struct repository *r,
>  	o.show_rename_progress = 1;
>  
>  	head_tree = parse_tree_indirect(head);
> +	if (!head_tree)
> +		return error(_("unable to read tree %s"), oid_to_hex(head));
>  	next_tree = next ? repo_get_commit_tree(r, next) : empty_tree(r);
>  	base_tree = base ? repo_get_commit_tree(r, base) : empty_tree(r);

Ditto.

> @@ -3887,6 +3889,8 @@ static int do_reset(struct repository *r,
>  	}
>  
>  	tree = parse_tree_indirect(&oid);
> +	if (!tree)
> +		return error(_("unable to read tree %s"), oid_to_hex(&oid));

Ditto.

>  	prime_cache_tree(r, r->index, tree);
>  
>  	if (write_locked_index(r->index, &lock, COMMIT_LOCK) < 0)

Looking good, modulo the additional translator burden.

Thanks.
Johannes Schindelin Feb. 23, 2024, 8:33 a.m. UTC | #2
Hi Junio,

thank you so much for reviewing!

On Thu, 22 Feb 2024, Junio C Hamano wrote:

> "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
> writes:
>
> > +	} else {
> >  		new_tree = repo_get_commit_tree(the_repository,
> >  						new_branch_info->commit);
> > +		if (!new_tree)
> > +			return error(_("unable to read tree %s"),
> > +				     oid_to_hex(&new_branch_info->commit->object.oid));
>
> We can help translators by enclosing %s inside a pair of parentheses.
>
>     $ git grep -h 'msgid .*unable to read tree' po | sort | uniq -c
>      18 msgid "unable to read tree (%s)"

Right. I had used

  $ git grep 'unable to read tree .*%s' | sed -n 's/.*_("\([^"]*\).*/\1/p' | sort | uniq -c
       11 unable to read tree %s
        3 unable to read tree (%s)

only to realize that the 11 were the ones I added.
Junio C Hamano Feb. 23, 2024, 5:17 p.m. UTC | #3
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Right. I had used
>
>   $ git grep 'unable to read tree .*%s' | sed -n 's/.*_("\([^"]*\).*/\1/p' | sort | uniq -c
>        11 unable to read tree %s
>         3 unable to read tree (%s)
>
> only to realize that the 11 were the ones I added.
diff mbox series

Patch

diff --git a/builtin/checkout.c b/builtin/checkout.c
index f02434bc155..84108ec3635 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -707,7 +707,8 @@  static int reset_tree(struct tree *tree, const struct checkout_opts *o,
 	init_checkout_metadata(&opts.meta, info->refname,
 			       info->commit ? &info->commit->object.oid : null_oid(),
 			       NULL);
-	parse_tree(tree);
+	if (parse_tree(tree) < 0)
+		return 128;
 	init_tree_desc(&tree_desc, tree->buffer, tree->size);
 	switch (unpack_trees(1, &tree_desc, &opts)) {
 	case -2:
@@ -786,9 +787,15 @@  static int merge_working_tree(const struct checkout_opts *opts,
 		if (new_branch_info->commit)
 			BUG("'switch --orphan' should never accept a commit as starting point");
 		new_tree = parse_tree_indirect(the_hash_algo->empty_tree);
-	} else
+		if (!new_tree)
+			BUG("unable to read empty tree");
+	} else {
 		new_tree = repo_get_commit_tree(the_repository,
 						new_branch_info->commit);
+		if (!new_tree)
+			return error(_("unable to read tree %s"),
+				     oid_to_hex(&new_branch_info->commit->object.oid));
+	}
 	if (opts->discard_changes) {
 		ret = reset_tree(new_tree, opts, 1, writeout_error, new_branch_info);
 		if (ret)
@@ -823,7 +830,8 @@  static int merge_working_tree(const struct checkout_opts *opts,
 				oid_to_hex(old_commit_oid));
 
 		init_tree_desc(&trees[0], tree->buffer, tree->size);
-		parse_tree(new_tree);
+		if (parse_tree(new_tree) < 0)
+			exit(128);
 		tree = new_tree;
 		init_tree_desc(&trees[1], tree->buffer, tree->size);
 
@@ -1239,10 +1247,15 @@  static void setup_new_branch_info_and_source_tree(
 	if (!new_branch_info->commit) {
 		/* not a commit */
 		*source_tree = parse_tree_indirect(rev);
+		if (!*source_tree)
+			die(_("unable to read tree %s"), oid_to_hex(rev));
 	} else {
 		parse_commit_or_die(new_branch_info->commit);
 		*source_tree = repo_get_commit_tree(the_repository,
 						    new_branch_info->commit);
+		if (!*source_tree)
+			die(_("unable to read tree %s"),
+			    oid_to_hex(&new_branch_info->commit->object.oid));
 	}
 }
 
diff --git a/builtin/clone.c b/builtin/clone.c
index c6357af9498..4410b55be98 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -736,7 +736,8 @@  static int checkout(int submodule_progress, int filter_submodules)
 	tree = parse_tree_indirect(&oid);
 	if (!tree)
 		die(_("unable to parse commit %s"), oid_to_hex(&oid));
-	parse_tree(tree);
+	if (parse_tree(tree) < 0)
+		exit(128);
 	init_tree_desc(&t, tree->buffer, tree->size);
 	if (unpack_trees(1, &t, &opts) < 0)
 		die(_("unable to checkout working tree"));
diff --git a/builtin/commit.c b/builtin/commit.c
index 781af2e206c..0723f06de7a 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -339,7 +339,8 @@  static void create_base_index(const struct commit *current_head)
 	tree = parse_tree_indirect(&current_head->object.oid);
 	if (!tree)
 		die(_("failed to unpack HEAD tree object"));
-	parse_tree(tree);
+	if (parse_tree(tree) < 0)
+		exit(128);
 	init_tree_desc(&t, tree->buffer, tree->size);
 	if (unpack_trees(1, &t, &opts))
 		exit(128); /* We've already reported the error, finish dying */
diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
index 2d4ce5b3886..ba84d00deee 100644
--- a/builtin/merge-tree.c
+++ b/builtin/merge-tree.c
@@ -447,12 +447,18 @@  static int real_merge(struct merge_tree_options *o,
 		if (repo_get_oid_treeish(the_repository, merge_base, &base_oid))
 			die(_("could not parse as tree '%s'"), merge_base);
 		base_tree = parse_tree_indirect(&base_oid);
+		if (!base_tree)
+			die(_("unable to read tree %s"), oid_to_hex(&base_oid));
 		if (repo_get_oid_treeish(the_repository, branch1, &head_oid))
 			die(_("could not parse as tree '%s'"), branch1);
 		parent1_tree = parse_tree_indirect(&head_oid);
+		if (!parent1_tree)
+			die(_("unable to read tree %s"), oid_to_hex(&head_oid));
 		if (repo_get_oid_treeish(the_repository, branch2, &merge_oid))
 			die(_("could not parse as tree '%s'"), branch2);
 		parent2_tree = parse_tree_indirect(&merge_oid);
+		if (!parent2_tree)
+			die(_("unable to read tree %s"), oid_to_hex(&merge_oid));
 
 		opt.ancestor = merge_base;
 		merge_incore_nonrecursive(&opt, base_tree, parent1_tree, parent2_tree, &result);
diff --git a/builtin/read-tree.c b/builtin/read-tree.c
index 8196ca9dd85..5923ed36893 100644
--- a/builtin/read-tree.c
+++ b/builtin/read-tree.c
@@ -263,7 +263,8 @@  int cmd_read_tree(int argc, const char **argv, const char *cmd_prefix)
 	cache_tree_free(&the_index.cache_tree);
 	for (i = 0; i < nr_trees; i++) {
 		struct tree *tree = trees[i];
-		parse_tree(tree);
+		if (parse_tree(tree) < 0)
+			return 128;
 		init_tree_desc(t+i, tree->buffer, tree->size);
 	}
 	if (unpack_trees(nr_trees, t, &opts))
diff --git a/builtin/reset.c b/builtin/reset.c
index 4b018d20e3b..f030f57f4e9 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -119,6 +119,10 @@  static int reset_index(const char *ref, const struct object_id *oid, int reset_t
 
 	if (reset_type == MIXED || reset_type == HARD) {
 		tree = parse_tree_indirect(oid);
+		if (!tree) {
+			error(_("unable to read tree %s"), oid_to_hex(oid));
+			goto out;
+		}
 		prime_cache_tree(the_repository, the_repository->index, tree);
 	}
 
diff --git a/cache-tree.c b/cache-tree.c
index 641427ed410..c6508b64a5c 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -779,8 +779,8 @@  static void prime_cache_tree_rec(struct repository *r,
 			struct cache_tree_sub *sub;
 			struct tree *subtree = lookup_tree(r, &entry.oid);
 
-			if (!subtree->object.parsed)
-				parse_tree(subtree);
+			if (!subtree->object.parsed && parse_tree(subtree) < 0)
+				exit(128);
 			sub = cache_tree_sub(it, entry.path);
 			sub->cache_tree = cache_tree();
 
diff --git a/merge-ort.c b/merge-ort.c
index 79d9e18f63d..534ddaf16ba 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -4983,6 +4983,9 @@  static void merge_ort_nonrecursive_internal(struct merge_options *opt,
 
 	if (result->clean >= 0) {
 		result->tree = parse_tree_indirect(&working_tree_oid);
+		if (!result->tree)
+			die(_("unable to read tree %s"),
+			    oid_to_hex(&working_tree_oid));
 		/* existence of conflicted entries implies unclean */
 		result->clean &= strmap_empty(&opt->priv->conflicted);
 	}
diff --git a/merge-recursive.c b/merge-recursive.c
index e3beb0801b1..10d41bfd487 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -410,7 +410,8 @@  static inline int merge_detect_rename(struct merge_options *opt)
 
 static void init_tree_desc_from_tree(struct tree_desc *desc, struct tree *tree)
 {
-	parse_tree(tree);
+	if (parse_tree(tree) < 0)
+		exit(128);
 	init_tree_desc(desc, tree->buffer, tree->size);
 }
 
diff --git a/merge.c b/merge.c
index b60925459c2..14a7325859d 100644
--- a/merge.c
+++ b/merge.c
@@ -80,7 +80,10 @@  int checkout_fast_forward(struct repository *r,
 		return -1;
 	}
 	for (i = 0; i < nr_trees; i++) {
-		parse_tree(trees[i]);
+		if (parse_tree(trees[i]) < 0) {
+			rollback_lock_file(&lock_file);
+			return -1;
+		}
 		init_tree_desc(t+i, trees[i]->buffer, trees[i]->size);
 	}
 
diff --git a/reset.c b/reset.c
index 48da0adf851..a93fdbc12e3 100644
--- a/reset.c
+++ b/reset.c
@@ -158,6 +158,11 @@  int reset_head(struct repository *r, const struct reset_head_opts *opts)
 	}
 
 	tree = parse_tree_indirect(oid);
+	if (!tree) {
+		ret = error(_("unable to read tree %s"), oid_to_hex(oid));
+		goto leave_reset_head;
+	}
+
 	prime_cache_tree(r, r->index, tree);
 
 	if (write_locked_index(r->index, &lock, COMMIT_LOCK) < 0) {
diff --git a/sequencer.c b/sequencer.c
index d584cac8ed9..407473bab28 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -715,6 +715,8 @@  static int do_recursive_merge(struct repository *r,
 	o.show_rename_progress = 1;
 
 	head_tree = parse_tree_indirect(head);
+	if (!head_tree)
+		return error(_("unable to read tree %s"), oid_to_hex(head));
 	next_tree = next ? repo_get_commit_tree(r, next) : empty_tree(r);
 	base_tree = base ? repo_get_commit_tree(r, base) : empty_tree(r);
 
@@ -3887,6 +3889,8 @@  static int do_reset(struct repository *r,
 	}
 
 	tree = parse_tree_indirect(&oid);
+	if (!tree)
+		return error(_("unable to read tree %s"), oid_to_hex(&oid));
 	prime_cache_tree(r, r->index, tree);
 
 	if (write_locked_index(r->index, &lock, COMMIT_LOCK) < 0)