diff mbox series

[01/10] files-backend: add object check for regular ref

Message ID Z3qN1T3lJoj82ckl@ArchLinux (mailing list archive)
State New
Headers show
Series add more ref consistency checks | expand

Commit Message

shejialuo Jan. 5, 2025, 1:49 p.m. UTC
Although we use "parse_loose_ref_content" to check whether the object id
is correct, we never parse it into the "struct object" structure thus we
ignore checking whether there is a real object existing in the repo and
whether the object type is correct.

Use "parse_object" to parse the oid for the regular ref content. If the
object does not exist, report the error to the user by reusing the fsck
message "BAD_REF_CONTENT".

Then, we need to check the type of the object. Just like "git-fsck(1)",
we only report "not a commit" error when the ref is a branch. Last,
update the test to exercise the code.

Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
 refs/files-backend.c     | 50 ++++++++++++++++++++++++++++++++--------
 t/t0602-reffiles-fsck.sh | 30 ++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 10 deletions(-)

Comments

Karthik Nayak Jan. 7, 2025, 2:17 p.m. UTC | #1
shejialuo <shejialuo@gmail.com> writes:

> Although we use "parse_loose_ref_content" to check whether the object id
> is correct, we never parse it into the "struct object" structure thus we
> ignore checking whether there is a real object existing in the repo and
> whether the object type is correct.
>
> Use "parse_object" to parse the oid for the regular ref content. If the
> object does not exist, report the error to the user by reusing the fsck
> message "BAD_REF_CONTENT".
>
> Then, we need to check the type of the object. Just like "git-fsck(1)",
> we only report "not a commit" error when the ref is a branch. Last,
> update the test to exercise the code.

I found this a bit confusing at first, the code does clear up the
confusion. Perhaps we can say something like:

  Branches that do not point to a commit type are explicitly called out,
  similar to 'git-fsck(1)'.

>
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
>  refs/files-backend.c     | 50 ++++++++++++++++++++++++++++++++--------
>  t/t0602-reffiles-fsck.sh | 30 ++++++++++++++++++++++++
>  2 files changed, 70 insertions(+), 10 deletions(-)
>
> diff --git a/refs/files-backend.c b/refs/files-backend.c
> index 64f51f0da9..0a4912c009 100644
> --- a/refs/files-backend.c
> +++ b/refs/files-backend.c
> @@ -20,6 +20,7 @@
>  #include "../lockfile.h"
>  #include "../object.h"
>  #include "../object-file.h"
> +#include "../packfile.h"
>  #include "../path.h"
>  #include "../dir.h"
>  #include "../chdir-notify.h"
> @@ -3589,6 +3590,34 @@ static int files_fsck_symref_target(struct fsck_options *o,
>  	return ret;
>  }
>
> +static int files_fsck_refs_oid(struct fsck_options *o,
> +			       struct ref_store *ref_store,
> +			       struct fsck_ref_report report,
> +			       const char *target_name,
> +			       struct object_id *oid)
> +{
> +	struct object *obj;
> +	int ret = 0;
> +
> +	if (is_promisor_object(ref_store->repo, oid))
> +		return 0;
> +
> +	obj = parse_object(ref_store->repo, oid);
> +	if (!obj) {
> +		ret |= fsck_report_ref(o, &report,
> +				       FSCK_MSG_BAD_REF_CONTENT,
> +				       "points to non-existing object %s",
> +				       oid_to_hex(oid));

Nit: The two conditionals here are mutually exclusive. So we don't have
to do `ret |=`, no? We don't even need `ret` here, we could simply do a
`return fsck_report_ref(...)`.

> +	} else if (obj->type != OBJ_COMMIT && is_branch(target_name)) {
> +		ret |= fsck_report_ref(o, &report,
> +				       FSCK_MSG_BAD_REF_CONTENT,
> +				       "points to non-commit object %s",
> +				       oid_to_hex(oid));
> +	}

Since this is a single lined if/else, we can skip the braces here.

> +	return ret;
> +}
> +
>  static int files_fsck_refs_content(struct ref_store *ref_store,
>  				   struct fsck_options *o,
>  				   const char *target_name,
> @@ -3654,18 +3683,19 @@ static int files_fsck_refs_content(struct ref_store *ref_store,
>  	}
>
>  	if (!(type & REF_ISSYMREF)) {
> +		ret |= files_fsck_refs_oid(o, ref_store, report, target_name, &oid);
> +
>  		if (!*trailing) {
> -			ret = fsck_report_ref(o, &report,
> -					      FSCK_MSG_REF_MISSING_NEWLINE,
> -					      "misses LF at the end");
> -			goto cleanup;
> -		}
> -		if (*trailing != '\n' || *(trailing + 1)) {
> -			ret = fsck_report_ref(o, &report,
> -					      FSCK_MSG_TRAILING_REF_CONTENT,
> -					      "has trailing garbage: '%s'", trailing);
> -			goto cleanup;
> +			ret |= fsck_report_ref(o, &report,
> +					       FSCK_MSG_REF_MISSING_NEWLINE,
> +					       "misses LF at the end");
> +		} else if (*trailing != '\n' || *(trailing + 1)) {
> +			ret |= fsck_report_ref(o, &report,
> +					       FSCK_MSG_TRAILING_REF_CONTENT,
> +					       "has trailing garbage: '%s'", trailing);
>  		}
> +
> +		goto cleanup;
>  	} else {
>  		ret = files_fsck_symref_target(o, &report, &referent, 0);
>  		goto cleanup;
> diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
> index d4a08b823b..75f234a94a 100755
> --- a/t/t0602-reffiles-fsck.sh
> +++ b/t/t0602-reffiles-fsck.sh
> @@ -161,8 +161,10 @@ test_expect_success 'regular ref content should be checked (individual)' '
>  	test_when_finished "rm -rf repo" &&
>  	git init repo &&
>  	branch_dir_prefix=.git/refs/heads &&
> +	tag_dir_prefix=.git/refs/tags &&
>  	cd repo &&
>  	test_commit default &&
> +	git branch branch-1 &&
>  	mkdir -p "$branch_dir_prefix/a/b" &&
>
>  	git refs verify 2>err &&
> @@ -198,6 +200,28 @@ test_expect_success 'regular ref content should be checked (individual)' '
>  	rm $branch_dir_prefix/branch-no-newline &&
>  	test_cmp expect err &&
>
> +	for non_existing_oid in "$(test_oid 001)" "$(test_oid 002)"
> +	do
> +		printf "%s\n" $non_existing_oid >$branch_dir_prefix/invalid-commit &&
> +		test_must_fail git refs verify 2>err &&
> +		cat >expect <<-EOF &&
> +		error: refs/heads/invalid-commit: badRefContent: points to non-existing object $non_existing_oid
> +		EOF
> +		rm $branch_dir_prefix/invalid-commit &&
> +		test_cmp expect err || return 1
> +	done &&
> +
> +	for tree_oid in "$(git rev-parse main^{tree})" "$(git rev-parse branch-1^{tree})"
> +	do
> +		printf "%s\n" $tree_oid >$branch_dir_prefix/branch-tree &&
> +		test_must_fail git refs verify 2>err &&
> +		cat >expect <<-EOF &&
> +		error: refs/heads/branch-tree: badRefContent: points to non-commit object $tree_oid

Reading this error here, I think it would be nicer to say
'badRefContent: branch points to ....' so we know that the specified ref
is a branch.

> +		EOF
> +		rm $branch_dir_prefix/branch-tree &&
> +		test_cmp expect err || return 1
> +	done &&
> +
>  	for trailing_content in " garbage" "    more garbage"
>  	do
>  		printf "%s" "$(git rev-parse main)$trailing_content" >$branch_dir_prefix/branch-garbage &&
> @@ -244,15 +268,21 @@ test_expect_success 'regular ref content should be checked (aggregate)' '
>  	bad_content_1=$(git rev-parse main)x &&
>  	bad_content_2=xfsazqfxcadas &&
>  	bad_content_3=Xfsazqfxcadas &&
> +	non_existing_oid=$(test_oid 001) &&
> +	tree_oid=$(git rev-parse main^{tree}) &&
>  	printf "%s" $bad_content_1 >$tag_dir_prefix/tag-bad-1 &&
>  	printf "%s" $bad_content_2 >$tag_dir_prefix/tag-bad-2 &&
>  	printf "%s" $bad_content_3 >$branch_dir_prefix/a/b/branch-bad &&
>  	printf "%s" "$(git rev-parse main)" >$branch_dir_prefix/branch-no-newline &&
>  	printf "%s garbage" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage &&
> +	printf "%s\n" $non_existing_oid >$branch_dir_prefix/branch-non-existing-oid &&
> +	printf "%s\n" $tree_oid >$branch_dir_prefix/branch-tree &&
>
>  	test_must_fail git refs verify 2>err &&
>  	cat >expect <<-EOF &&
>  	error: refs/heads/a/b/branch-bad: badRefContent: $bad_content_3
> +	error: refs/heads/branch-non-existing-oid: badRefContent: points to non-existing object $non_existing_oid
> +	error: refs/heads/branch-tree: badRefContent: points to non-commit object $tree_oid
>  	error: refs/tags/tag-bad-1: badRefContent: $bad_content_1
>  	error: refs/tags/tag-bad-2: badRefContent: $bad_content_2
>  	warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\'' garbage'\''
> --
> 2.47.1
diff mbox series

Patch

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 64f51f0da9..0a4912c009 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -20,6 +20,7 @@ 
 #include "../lockfile.h"
 #include "../object.h"
 #include "../object-file.h"
+#include "../packfile.h"
 #include "../path.h"
 #include "../dir.h"
 #include "../chdir-notify.h"
@@ -3589,6 +3590,34 @@  static int files_fsck_symref_target(struct fsck_options *o,
 	return ret;
 }
 
+static int files_fsck_refs_oid(struct fsck_options *o,
+			       struct ref_store *ref_store,
+			       struct fsck_ref_report report,
+			       const char *target_name,
+			       struct object_id *oid)
+{
+	struct object *obj;
+	int ret = 0;
+
+	if (is_promisor_object(ref_store->repo, oid))
+		return 0;
+
+	obj = parse_object(ref_store->repo, oid);
+	if (!obj) {
+		ret |= fsck_report_ref(o, &report,
+				       FSCK_MSG_BAD_REF_CONTENT,
+				       "points to non-existing object %s",
+				       oid_to_hex(oid));
+	} else if (obj->type != OBJ_COMMIT && is_branch(target_name)) {
+		ret |= fsck_report_ref(o, &report,
+				       FSCK_MSG_BAD_REF_CONTENT,
+				       "points to non-commit object %s",
+				       oid_to_hex(oid));
+	}
+
+	return ret;
+}
+
 static int files_fsck_refs_content(struct ref_store *ref_store,
 				   struct fsck_options *o,
 				   const char *target_name,
@@ -3654,18 +3683,19 @@  static int files_fsck_refs_content(struct ref_store *ref_store,
 	}
 
 	if (!(type & REF_ISSYMREF)) {
+		ret |= files_fsck_refs_oid(o, ref_store, report, target_name, &oid);
+
 		if (!*trailing) {
-			ret = fsck_report_ref(o, &report,
-					      FSCK_MSG_REF_MISSING_NEWLINE,
-					      "misses LF at the end");
-			goto cleanup;
-		}
-		if (*trailing != '\n' || *(trailing + 1)) {
-			ret = fsck_report_ref(o, &report,
-					      FSCK_MSG_TRAILING_REF_CONTENT,
-					      "has trailing garbage: '%s'", trailing);
-			goto cleanup;
+			ret |= fsck_report_ref(o, &report,
+					       FSCK_MSG_REF_MISSING_NEWLINE,
+					       "misses LF at the end");
+		} else if (*trailing != '\n' || *(trailing + 1)) {
+			ret |= fsck_report_ref(o, &report,
+					       FSCK_MSG_TRAILING_REF_CONTENT,
+					       "has trailing garbage: '%s'", trailing);
 		}
+
+		goto cleanup;
 	} else {
 		ret = files_fsck_symref_target(o, &report, &referent, 0);
 		goto cleanup;
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index d4a08b823b..75f234a94a 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -161,8 +161,10 @@  test_expect_success 'regular ref content should be checked (individual)' '
 	test_when_finished "rm -rf repo" &&
 	git init repo &&
 	branch_dir_prefix=.git/refs/heads &&
+	tag_dir_prefix=.git/refs/tags &&
 	cd repo &&
 	test_commit default &&
+	git branch branch-1 &&
 	mkdir -p "$branch_dir_prefix/a/b" &&
 
 	git refs verify 2>err &&
@@ -198,6 +200,28 @@  test_expect_success 'regular ref content should be checked (individual)' '
 	rm $branch_dir_prefix/branch-no-newline &&
 	test_cmp expect err &&
 
+	for non_existing_oid in "$(test_oid 001)" "$(test_oid 002)"
+	do
+		printf "%s\n" $non_existing_oid >$branch_dir_prefix/invalid-commit &&
+		test_must_fail git refs verify 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/heads/invalid-commit: badRefContent: points to non-existing object $non_existing_oid
+		EOF
+		rm $branch_dir_prefix/invalid-commit &&
+		test_cmp expect err || return 1
+	done &&
+
+	for tree_oid in "$(git rev-parse main^{tree})" "$(git rev-parse branch-1^{tree})"
+	do
+		printf "%s\n" $tree_oid >$branch_dir_prefix/branch-tree &&
+		test_must_fail git refs verify 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/heads/branch-tree: badRefContent: points to non-commit object $tree_oid
+		EOF
+		rm $branch_dir_prefix/branch-tree &&
+		test_cmp expect err || return 1
+	done &&
+
 	for trailing_content in " garbage" "    more garbage"
 	do
 		printf "%s" "$(git rev-parse main)$trailing_content" >$branch_dir_prefix/branch-garbage &&
@@ -244,15 +268,21 @@  test_expect_success 'regular ref content should be checked (aggregate)' '
 	bad_content_1=$(git rev-parse main)x &&
 	bad_content_2=xfsazqfxcadas &&
 	bad_content_3=Xfsazqfxcadas &&
+	non_existing_oid=$(test_oid 001) &&
+	tree_oid=$(git rev-parse main^{tree}) &&
 	printf "%s" $bad_content_1 >$tag_dir_prefix/tag-bad-1 &&
 	printf "%s" $bad_content_2 >$tag_dir_prefix/tag-bad-2 &&
 	printf "%s" $bad_content_3 >$branch_dir_prefix/a/b/branch-bad &&
 	printf "%s" "$(git rev-parse main)" >$branch_dir_prefix/branch-no-newline &&
 	printf "%s garbage" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage &&
+	printf "%s\n" $non_existing_oid >$branch_dir_prefix/branch-non-existing-oid &&
+	printf "%s\n" $tree_oid >$branch_dir_prefix/branch-tree &&
 
 	test_must_fail git refs verify 2>err &&
 	cat >expect <<-EOF &&
 	error: refs/heads/a/b/branch-bad: badRefContent: $bad_content_3
+	error: refs/heads/branch-non-existing-oid: badRefContent: points to non-existing object $non_existing_oid
+	error: refs/heads/branch-tree: badRefContent: points to non-commit object $tree_oid
 	error: refs/tags/tag-bad-1: badRefContent: $bad_content_1
 	error: refs/tags/tag-bad-2: badRefContent: $bad_content_2
 	warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\'' garbage'\''