diff mbox series

[3/5] fsck: don't hard die on invalid object types

Message ID patch-3.6-74654a01ba3-20210328T025618Z-avarab@gmail.com (mailing list archive)
State New, archived
Headers show
Series fsck: improve error reporting | expand

Commit Message

Ævar Arnfjörð Bjarmason March 28, 2021, 2:58 a.m. UTC
Change builtin/fsck.c to pass down a
OBJECT_INFO_ALLOW_UNKNOWN_TYPE. This changes this very ungraceful
error:

    $ git hash-object --stdin -w -t garbage --literally </dev/null
    <OID>
    $ git fsck
    fatal: invalid object type
    $

Into:

    $ git fsck
    error: hash mismatch for <OID_PATH> (expected <OID>)
    error: <OID>: object corrupt or missing: <OID_PATH>
    [ the rest of the fsck output here, i.e. it didn't hard die ]

We'll still exit with non-zero, but now we'll finish the rest of the
traversal. The tests that's being added here asserts that we'll still
complain about other fsck issues (e.g. an unrelated dangling blob).

But why are we complaining about a "hash mismatch" for an object of a
type we don't know about? We shouldn't. This is the bare minimal
change needed to not make fsck hard die on a repository that's been
corrupted in this manner. In subsequent commits we'll teach fsck to
recognize this particular type of corruption and emit a better error
message.

The parse_loose_header() function being changed here is only used in
builtin/fsck.c, see f6371f92104 (sha1_file: add read_loose_object()
function, 2017-01-13) for its introduction.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/fsck.c  |  3 ++-
 object-file.c   | 24 ++++++++++--------------
 object-store.h  |  6 ++++--
 streaming.c     |  5 ++++-
 t/t1450-fsck.sh |  9 +++++++++
 5 files changed, 29 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 821e7798c70..d92c530863d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -602,7 +602,8 @@  static int fsck_loose(const struct object_id *oid, const char *path, void *data)
 	void *contents;
 	int eaten;
 
-	if (read_loose_object(path, oid, &type, &size, &contents) < 0) {
+	if (read_loose_object(path, oid, &type, &size, &contents,
+			      OBJECT_INFO_ALLOW_UNKNOWN_TYPE) < 0) {
 		errors_found |= ERROR_OBJECT;
 		error(_("%s: object corrupt or missing: %s"),
 		      oid_to_hex(oid), path);
diff --git a/object-file.c b/object-file.c
index 624af408cdc..26560a6281c 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1294,8 +1294,9 @@  static void *unpack_loose_rest(git_zstream *stream,
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
-				       unsigned int flags)
+int parse_loose_header(const char *hdr,
+		       struct object_info *oi,
+		       unsigned int flags)
 {
 	const char *type_buf = hdr;
 	unsigned long size;
@@ -1355,14 +1356,6 @@  static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
 	return *hdr ? -1 : type;
 }
 
-int parse_loose_header(const char *hdr, unsigned long *sizep)
-{
-	struct object_info oi = OBJECT_INFO_INIT;
-
-	oi.sizep = sizep;
-	return parse_loose_header_extended(hdr, &oi, 0);
-}
-
 static int loose_object_info(struct repository *r,
 			     const struct object_id *oid,
 			     struct object_info *oi, int flags)
@@ -1417,10 +1410,10 @@  static int loose_object_info(struct repository *r,
 	if (status < 0)
 		; /* Do nothing */
 	else if (hdrbuf.len) {
-		if ((status = parse_loose_header_extended(hdrbuf.buf, oi, flags)) < 0)
+		if ((status = parse_loose_header(hdrbuf.buf, oi, flags)) < 0)
 			status = error(_("unable to parse %s header with --allow-unknown-type"),
 				       oid_to_hex(oid));
-	} else if ((status = parse_loose_header_extended(hdr, oi, flags)) < 0)
+	} else if ((status = parse_loose_header(hdr, oi, flags)) < 0)
 		status = error(_("unable to parse %s header"), oid_to_hex(oid));
 
 	if (status >= 0 && oi->contentp) {
@@ -2497,13 +2490,16 @@  int read_loose_object(const char *path,
 		      const struct object_id *expected_oid,
 		      enum object_type *type,
 		      unsigned long *size,
-		      void **contents)
+		      void **contents,
+		      unsigned int oi_flags)
 {
 	int ret = -1;
 	void *map = NULL;
 	unsigned long mapsize;
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
+	struct object_info oi = OBJECT_INFO_INIT;
+	oi.sizep = size;
 
 	*contents = NULL;
 
@@ -2518,7 +2514,7 @@  int read_loose_object(const char *path,
 		goto out;
 	}
 
-	*type = parse_loose_header(hdr, size);
+	*type = parse_loose_header(hdr, &oi, oi_flags);
 	if (*type < 0) {
 		error(_("unable to parse header of %s"), path);
 		git_inflate_end(&stream);
diff --git a/object-store.h b/object-store.h
index 9117115a50c..ab86c8bf32c 100644
--- a/object-store.h
+++ b/object-store.h
@@ -245,7 +245,8 @@  int read_loose_object(const char *path,
 		      const struct object_id *expected_oid,
 		      enum object_type *type,
 		      unsigned long *size,
-		      void **contents);
+		      void **contents,
+		      unsigned int oi_flags);
 
 /* Retry packed storage after checking packed and loose storage */
 #define HAS_OBJECT_RECHECK_PACKED 1
@@ -480,7 +481,8 @@  int for_each_packed_object(each_packed_object_fn, void *,
 int unpack_loose_header(git_zstream *stream, unsigned char *map,
 			unsigned long mapsize, void *buffer,
 			unsigned long bufsiz);
-int parse_loose_header(const char *hdr, unsigned long *sizep);
+int parse_loose_header(const char *hdr, struct object_info *oi,
+		       unsigned int flags);
 int check_object_signature(struct repository *r, const struct object_id *oid,
 			   void *buf, unsigned long size, const char *type);
 int finalize_object_file(const char *tmpfile, const char *filename);
diff --git a/streaming.c b/streaming.c
index 800f07a52cc..e5d4dd2f654 100644
--- a/streaming.c
+++ b/streaming.c
@@ -341,6 +341,9 @@  static struct stream_vtbl loose_vtbl = {
 
 static open_method_decl(loose)
 {
+	struct object_info oi2 = OBJECT_INFO_INIT;
+	oi2.sizep = &st->size;
+
 	st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize);
 	if (!st->u.loose.mapped)
 		return -1;
@@ -349,7 +352,7 @@  static open_method_decl(loose)
 				 st->u.loose.mapsize,
 				 st->u.loose.hdr,
 				 sizeof(st->u.loose.hdr)) < 0) ||
-	    (parse_loose_header(st->u.loose.hdr, &st->size) < 0)) {
+	    (parse_loose_header(st->u.loose.hdr, &oi2, 0) < 0)) {
 		git_inflate_end(&st->z);
 		munmap(st->u.loose.mapped, st->u.loose.mapsize);
 		return -1;
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 1563b35f88c..025dd1b491a 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -863,4 +863,13 @@  test_expect_success 'detect corrupt index file in fsck' '
 	test_i18ngrep "bad index file" errors
 '
 
+test_expect_success 'fsck error and recovery on invalid object type' '
+	test_create_repo garbage-type &&
+	empty_blob=$(git -C garbage-type hash-object --stdin -w -t blob </dev/null) &&
+	garbage_blob=$(git -C garbage-type hash-object --stdin -w -t garbage --literally </dev/null) &&
+	test_must_fail git -C garbage-type fsck >out 2>err &&
+	grep "$garbage_blob: object corrupt or missing:" err &&
+	grep "dangling blob $empty_blob" out
+'
+
 test_done