@@ -602,7 +602,8 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data)
void *contents;
int eaten;
- if (read_loose_object(path, oid, &type, &size, &contents) < 0) {
+ if (read_loose_object(path, oid, &type, &size, &contents,
+ OBJECT_INFO_ALLOW_UNKNOWN_TYPE) < 0) {
errors_found |= ERROR_OBJECT;
error(_("%s: object corrupt or missing: %s"),
oid_to_hex(oid), path);
@@ -1294,8 +1294,9 @@ static void *unpack_loose_rest(git_zstream *stream,
* too permissive for what we want to check. So do an anal
* object header parse by hand.
*/
-static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
- unsigned int flags)
+int parse_loose_header(const char *hdr,
+ struct object_info *oi,
+ unsigned int flags)
{
const char *type_buf = hdr;
unsigned long size;
@@ -1355,14 +1356,6 @@ static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
return *hdr ? -1 : type;
}
-int parse_loose_header(const char *hdr, unsigned long *sizep)
-{
- struct object_info oi = OBJECT_INFO_INIT;
-
- oi.sizep = sizep;
- return parse_loose_header_extended(hdr, &oi, 0);
-}
-
static int loose_object_info(struct repository *r,
const struct object_id *oid,
struct object_info *oi, int flags)
@@ -1417,10 +1410,10 @@ static int loose_object_info(struct repository *r,
if (status < 0)
; /* Do nothing */
else if (hdrbuf.len) {
- if ((status = parse_loose_header_extended(hdrbuf.buf, oi, flags)) < 0)
+ if ((status = parse_loose_header(hdrbuf.buf, oi, flags)) < 0)
status = error(_("unable to parse %s header with --allow-unknown-type"),
oid_to_hex(oid));
- } else if ((status = parse_loose_header_extended(hdr, oi, flags)) < 0)
+ } else if ((status = parse_loose_header(hdr, oi, flags)) < 0)
status = error(_("unable to parse %s header"), oid_to_hex(oid));
if (status >= 0 && oi->contentp) {
@@ -2497,13 +2490,16 @@ int read_loose_object(const char *path,
const struct object_id *expected_oid,
enum object_type *type,
unsigned long *size,
- void **contents)
+ void **contents,
+ unsigned int oi_flags)
{
int ret = -1;
void *map = NULL;
unsigned long mapsize;
git_zstream stream;
char hdr[MAX_HEADER_LEN];
+ struct object_info oi = OBJECT_INFO_INIT;
+ oi.sizep = size;
*contents = NULL;
@@ -2518,7 +2514,7 @@ int read_loose_object(const char *path,
goto out;
}
- *type = parse_loose_header(hdr, size);
+ *type = parse_loose_header(hdr, &oi, oi_flags);
if (*type < 0) {
error(_("unable to parse header of %s"), path);
git_inflate_end(&stream);
@@ -245,7 +245,8 @@ int read_loose_object(const char *path,
const struct object_id *expected_oid,
enum object_type *type,
unsigned long *size,
- void **contents);
+ void **contents,
+ unsigned int oi_flags);
/* Retry packed storage after checking packed and loose storage */
#define HAS_OBJECT_RECHECK_PACKED 1
@@ -480,7 +481,8 @@ int for_each_packed_object(each_packed_object_fn, void *,
int unpack_loose_header(git_zstream *stream, unsigned char *map,
unsigned long mapsize, void *buffer,
unsigned long bufsiz);
-int parse_loose_header(const char *hdr, unsigned long *sizep);
+int parse_loose_header(const char *hdr, struct object_info *oi,
+ unsigned int flags);
int check_object_signature(struct repository *r, const struct object_id *oid,
void *buf, unsigned long size, const char *type);
int finalize_object_file(const char *tmpfile, const char *filename);
@@ -341,6 +341,9 @@ static struct stream_vtbl loose_vtbl = {
static open_method_decl(loose)
{
+ struct object_info oi2 = OBJECT_INFO_INIT;
+ oi2.sizep = &st->size;
+
st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize);
if (!st->u.loose.mapped)
return -1;
@@ -349,7 +352,7 @@ static open_method_decl(loose)
st->u.loose.mapsize,
st->u.loose.hdr,
sizeof(st->u.loose.hdr)) < 0) ||
- (parse_loose_header(st->u.loose.hdr, &st->size) < 0)) {
+ (parse_loose_header(st->u.loose.hdr, &oi2, 0) < 0)) {
git_inflate_end(&st->z);
munmap(st->u.loose.mapped, st->u.loose.mapsize);
return -1;
@@ -863,4 +863,13 @@ test_expect_success 'detect corrupt index file in fsck' '
test_i18ngrep "bad index file" errors
'
+test_expect_success 'fsck error and recovery on invalid object type' '
+ test_create_repo garbage-type &&
+ empty_blob=$(git -C garbage-type hash-object --stdin -w -t blob </dev/null) &&
+ garbage_blob=$(git -C garbage-type hash-object --stdin -w -t garbage --literally </dev/null) &&
+ test_must_fail git -C garbage-type fsck >out 2>err &&
+ grep "$garbage_blob: object corrupt or missing:" err &&
+ grep "dangling blob $empty_blob" out
+'
+
test_done
Change builtin/fsck.c to pass down a OBJECT_INFO_ALLOW_UNKNOWN_TYPE. This changes this very ungraceful error: $ git hash-object --stdin -w -t garbage --literally </dev/null <OID> $ git fsck fatal: invalid object type $ Into: $ git fsck error: hash mismatch for <OID_PATH> (expected <OID>) error: <OID>: object corrupt or missing: <OID_PATH> [ the rest of the fsck output here, i.e. it didn't hard die ] We'll still exit with non-zero, but now we'll finish the rest of the traversal. The tests that's being added here asserts that we'll still complain about other fsck issues (e.g. an unrelated dangling blob). But why are we complaining about a "hash mismatch" for an object of a type we don't know about? We shouldn't. This is the bare minimal change needed to not make fsck hard die on a repository that's been corrupted in this manner. In subsequent commits we'll teach fsck to recognize this particular type of corruption and emit a better error message. The parse_loose_header() function being changed here is only used in builtin/fsck.c, see f6371f92104 (sha1_file: add read_loose_object() function, 2017-01-13) for its introduction. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> --- builtin/fsck.c | 3 ++- object-file.c | 24 ++++++++++-------------- object-store.h | 6 ++++-- streaming.c | 5 ++++- t/t1450-fsck.sh | 9 +++++++++ 5 files changed, 29 insertions(+), 18 deletions(-)