diff mbox series

[1/3] cache-tree: refactor verification to return error codes

Message ID 1f13bc0e3259ea9b76f1417303a8ef063f3a7cbe.1726556195.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series cache-tree: fix segfaults with invalid cache-trees | expand

Commit Message

Patrick Steinhardt Sept. 17, 2024, 7:13 a.m. UTC
The function `cache_tree_verify()` will `BUG()` whenever it finds that
the cache-tree extension of the index is corrupt. The function is thus
inherently untestable because the resulting call to `abort()` will be
detected by our testing framework and labelled an error. And rightfully
so: it shouldn't ever be possible to hit bugs, as they should indicate a
programming error rather than corruption of on-disk state.

Refactor the function to instead return error codes. This also ensures
that the function can be used e.g. by git-fsck(1) without the whole
process dying.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 cache-tree.c   | 97 +++++++++++++++++++++++++++++++++++---------------
 cache-tree.h   |  2 +-
 read-cache.c   |  5 +--
 unpack-trees.c | 10 ++++--
 4 files changed, 79 insertions(+), 35 deletions(-)

Comments

Eric Sunshine Sept. 17, 2024, 5:05 p.m. UTC | #1
On Tue, Sep 17, 2024 at 3:13 AM Patrick Steinhardt <ps@pks.im> wrote:
> The function `cache_tree_verify()` will `BUG()` whenever it finds that
> the cache-tree extension of the index is corrupt. The function is thus
> inherently untestable because the resulting call to `abort()` will be
> detected by our testing framework and labelled an error. And rightfully
> so: it shouldn't ever be possible to hit bugs, as they should indicate a
> programming error rather than corruption of on-disk state.
>
> Refactor the function to instead return error codes. This also ensures
> that the function can be used e.g. by git-fsck(1) without the whole
> process dying.

Makes sense.

> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> diff --git a/cache-tree.c b/cache-tree.c
> @@ -890,18 +892,23 @@ static int verify_one(struct repository *r,
>         struct strbuf tree_buf = STRBUF_INIT;
>         for (i = 0; i < it->subtree_nr; i++) {
>                 strbuf_addf(path, "%s/", it->down[i]->name);
> -               if (verify_one(r, istate, it->down[i]->cache_tree, path))
> -                       return 1;
> +               ret = verify_one(r, istate, it->down[i]->cache_tree, path);
> +               if (ret)
> +                       goto out;

Assuming I am understanding correctly that the original code was
leaking the strbuf by returning early, I was surprised that the commit
message didn't mention that the patch is also fixing the leak.
(Probably not worth a reroll, though.)
Patrick Steinhardt Sept. 18, 2024, 5:11 a.m. UTC | #2
On Tue, Sep 17, 2024 at 01:05:50PM -0400, Eric Sunshine wrote:
> On Tue, Sep 17, 2024 at 3:13 AM Patrick Steinhardt <ps@pks.im> wrote:
> > Signed-off-by: Patrick Steinhardt <ps@pks.im>
> > ---
> > diff --git a/cache-tree.c b/cache-tree.c
> > @@ -890,18 +892,23 @@ static int verify_one(struct repository *r,
> >         struct strbuf tree_buf = STRBUF_INIT;
> >         for (i = 0; i < it->subtree_nr; i++) {
> >                 strbuf_addf(path, "%s/", it->down[i]->name);
> > -               if (verify_one(r, istate, it->down[i]->cache_tree, path))
> > -                       return 1;
> > +               ret = verify_one(r, istate, it->down[i]->cache_tree, path);
> > +               if (ret)
> > +                       goto out;
> 
> Assuming I am understanding correctly that the original code was
> leaking the strbuf by returning early, I was surprised that the commit
> message didn't mention that the patch is also fixing the leak.
> (Probably not worth a reroll, though.)

It just wasn't my main motivation here, so I forgot to mention it. Added
it now though, so it's included in case I'll have to reroll. Thanks!

Patrick
diff mbox series

Patch

diff --git a/cache-tree.c b/cache-tree.c
index 50610c3f3cb..4228b6fad48 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -2,6 +2,7 @@ 
 
 #include "git-compat-util.h"
 #include "environment.h"
+#include "gettext.h"
 #include "hex.h"
 #include "lockfile.h"
 #include "tree.h"
@@ -864,15 +865,15 @@  int cache_tree_matches_traversal(struct cache_tree *root,
 	return 0;
 }
 
-static void verify_one_sparse(struct index_state *istate,
-			      struct strbuf *path,
-			      int pos)
+static int verify_one_sparse(struct index_state *istate,
+			     struct strbuf *path,
+			     int pos)
 {
 	struct cache_entry *ce = istate->cache[pos];
-
 	if (!S_ISSPARSEDIR(ce->ce_mode))
-		BUG("directory '%s' is present in index, but not sparse",
-		    path->buf);
+		return error(_("directory '%s' is present in index, but not sparse"),
+			     path->buf);
+	return 0;
 }
 
 /*
@@ -881,6 +882,7 @@  static void verify_one_sparse(struct index_state *istate,
  *  1 - Restart verification - a call to ensure_full_index() freed the cache
  *      tree that is being verified and verification needs to be restarted from
  *      the new toplevel cache tree.
+ *  -1 - Verification failed.
  */
 static int verify_one(struct repository *r,
 		      struct index_state *istate,
@@ -890,18 +892,23 @@  static int verify_one(struct repository *r,
 	int i, pos, len = path->len;
 	struct strbuf tree_buf = STRBUF_INIT;
 	struct object_id new_oid;
+	int ret;
 
 	for (i = 0; i < it->subtree_nr; i++) {
 		strbuf_addf(path, "%s/", it->down[i]->name);
-		if (verify_one(r, istate, it->down[i]->cache_tree, path))
-			return 1;
+		ret = verify_one(r, istate, it->down[i]->cache_tree, path);
+		if (ret)
+			goto out;
+
 		strbuf_setlen(path, len);
 	}
 
 	if (it->entry_count < 0 ||
 	    /* no verification on tests (t7003) that replace trees */
-	    lookup_replace_object(r, &it->oid) != &it->oid)
-		return 0;
+	    lookup_replace_object(r, &it->oid) != &it->oid) {
+		ret = 0;
+		goto out;
+	}
 
 	if (path->len) {
 		/*
@@ -911,12 +918,14 @@  static int verify_one(struct repository *r,
 		 */
 		int is_sparse = istate->sparse_index;
 		pos = index_name_pos(istate, path->buf, path->len);
-		if (is_sparse && !istate->sparse_index)
-			return 1;
+		if (is_sparse && !istate->sparse_index) {
+			ret = 1;
+			goto out;
+		}
 
 		if (pos >= 0) {
-			verify_one_sparse(istate, path, pos);
-			return 0;
+			ret = verify_one_sparse(istate, path, pos);
+			goto out;
 		}
 
 		pos = -pos - 1;
@@ -934,16 +943,23 @@  static int verify_one(struct repository *r,
 		unsigned mode;
 		int entlen;
 
-		if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE))
-			BUG("%s with flags 0x%x should not be in cache-tree",
-			    ce->name, ce->ce_flags);
+		if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE)) {
+			ret = error(_("%s with flags 0x%x should not be in cache-tree"),
+				    ce->name, ce->ce_flags);
+			goto out;
+		}
+
 		name = ce->name + path->len;
 		slash = strchr(name, '/');
 		if (slash) {
 			entlen = slash - name;
+
 			sub = find_subtree(it, ce->name + path->len, entlen, 0);
-			if (!sub || sub->cache_tree->entry_count < 0)
-				BUG("bad subtree '%.*s'", entlen, name);
+			if (!sub || sub->cache_tree->entry_count < 0) {
+				ret = error(_("bad subtree '%.*s'"), entlen, name);
+				goto out;
+			}
+
 			oid = &sub->cache_tree->oid;
 			mode = S_IFDIR;
 			i += sub->cache_tree->entry_count;
@@ -956,27 +972,50 @@  static int verify_one(struct repository *r,
 		strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0');
 		strbuf_add(&tree_buf, oid->hash, r->hash_algo->rawsz);
 	}
+
 	hash_object_file(r->hash_algo, tree_buf.buf, tree_buf.len, OBJ_TREE,
 			 &new_oid);
-	if (!oideq(&new_oid, &it->oid))
-		BUG("cache-tree for path %.*s does not match. "
-		    "Expected %s got %s", len, path->buf,
-		    oid_to_hex(&new_oid), oid_to_hex(&it->oid));
+
+	if (!oideq(&new_oid, &it->oid)) {
+		ret = error(_("cache-tree for path %.*s does not match. "
+			      "Expected %s got %s"), len, path->buf,
+			    oid_to_hex(&new_oid), oid_to_hex(&it->oid));
+		goto out;
+	}
+
+	ret = 0;
+out:
 	strbuf_setlen(path, len);
 	strbuf_release(&tree_buf);
-	return 0;
+	return ret;
 }
 
-void cache_tree_verify(struct repository *r, struct index_state *istate)
+int cache_tree_verify(struct repository *r, struct index_state *istate)
 {
 	struct strbuf path = STRBUF_INIT;
+	int ret;
 
-	if (!istate->cache_tree)
-		return;
-	if (verify_one(r, istate, istate->cache_tree, &path)) {
+	if (!istate->cache_tree) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = verify_one(r, istate, istate->cache_tree, &path);
+	if (ret < 0)
+		goto out;
+	if (ret > 0) {
 		strbuf_reset(&path);
-		if (verify_one(r, istate, istate->cache_tree, &path))
+
+		ret = verify_one(r, istate, istate->cache_tree, &path);
+		if (ret < 0)
+			goto out;
+		if (ret > 0)
 			BUG("ensure_full_index() called twice while verifying cache tree");
 	}
+
+	ret = 0;
+
+out:
 	strbuf_release(&path);
+	return ret;
 }
diff --git a/cache-tree.h b/cache-tree.h
index faae88be63c..b82c4963e7c 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -33,7 +33,7 @@  struct cache_tree *cache_tree_read(const char *buffer, unsigned long size);
 
 int cache_tree_fully_valid(struct cache_tree *);
 int cache_tree_update(struct index_state *, int);
-void cache_tree_verify(struct repository *, struct index_state *);
+int cache_tree_verify(struct repository *, struct index_state *);
 
 /* bitmasks to write_index_as_tree flags */
 #define WRITE_TREE_MISSING_OK 1
diff --git a/read-cache.c b/read-cache.c
index 4e67dc182e7..d72a3266b6f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3331,8 +3331,9 @@  int write_locked_index(struct index_state *istate, struct lock_file *lock,
 	int new_shared_index, ret, test_split_index_env;
 	struct split_index *si = istate->split_index;
 
-	if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
-		cache_tree_verify(the_repository, istate);
+	if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0) &&
+	    cache_tree_verify(the_repository, istate) < 0)
+		return -1;
 
 	if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) {
 		if (flags & COMMIT_LOCK)
diff --git a/unpack-trees.c b/unpack-trees.c
index 9a55cb62040..21cc197d471 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -2070,9 +2070,13 @@  int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
 	if (o->dst_index) {
 		move_index_extensions(&o->internal.result, o->src_index);
 		if (!ret) {
-			if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
-				cache_tree_verify(the_repository,
-						  &o->internal.result);
+			if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0) &&
+			    cache_tree_verify(the_repository,
+					      &o->internal.result) < 0) {
+				ret = -1;
+				goto done;
+			}
+
 			if (!o->skip_cache_tree_update &&
 			    !cache_tree_fully_valid(o->internal.result.cache_tree))
 				cache_tree_update(&o->internal.result,