diff mbox series

[v5,3/4] object-file: emit corruption errors when detected

Message ID a229ea0b1122f55e91f98475cd7e508f4dd8501a.1670885252.git.jonathantanmy@google.com (mailing list archive)
State Superseded
Headers show
Series Don't lazy-fetch commits when parsing them | expand

Commit Message

Jonathan Tan Dec. 12, 2022, 10:48 p.m. UTC
Instead of relying on errno being preserved across function calls, teach
do_oid_object_info_extended() to itself report object corruption when
it first detects it. There are 3 types of corruption being detected:
 - when a replacement object is missing
 - when a loose object is corrupt
 - when a packed object is corrupt and the object cannot be read
   in another way

Note that in the RHS of this patch's diff, a check for ENOENT that was
introduced in 3ba7a06552 (A loose object is not corrupt if it cannot
be read due to EMFILE, 2010-10-28) is also removed. The purpose of this
check is to avoid a false report of corruption if the errno contains
something like EMFILE (or anything that is not ENOENT), in which case
a more generic report is presented. Because, as of this patch, we no
longer rely on such a heuristic to determine corruption, but surface
the error message at the point when we read something that we did not
expect, this check is no longer necessary.

Besides being more resilient, this also prepares for a future patch in
which an indirect caller of do_oid_object_info_extended() will need
such functionality.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 object-file.c  | 55 +++++++++++++++++++++++++-------------------------
 object-store.h |  3 +++
 2 files changed, 31 insertions(+), 27 deletions(-)

Comments

Junio C Hamano Dec. 13, 2022, 1:51 a.m. UTC | #1
Jonathan Tan <jonathantanmy@google.com> writes:

> diff --git a/object-file.c b/object-file.c
> index 429e3a746d..e0cef8b906 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -1422,7 +1422,9 @@ static int loose_object_info(struct repository *r,
>  			     struct object_info *oi, int flags)
>  {
>  	int status = 0;
> +	int fd;
>  	unsigned long mapsize;
> +	const char *path = NULL;

It may be OK to leave this uninitialized, as long as the contract of
open_loose_object() is that a successful opening will report the path
to the loose object file that was opened.  Because ...

> @@ -1454,7 +1455,13 @@ static int loose_object_info(struct repository *r,
>  		return 0;
>  	}
>  
> -	map = map_loose_object(r, oid, &mapsize);
> +	fd = open_loose_object(r, oid, &path);
> +	if (fd < 0) {
> +		if (errno != ENOENT)
> +			error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
> +		return -1;

... we no longer refer to "path" which may not be populated here, and ...

> +	}

... at this point, we know we successfully opened, and populated path.

> +	map = map_fd(fd, path, &mapsize);
>  	if (!map)
>  		return -1;
>  
> @@ -1492,6 +1499,10 @@ static int loose_object_info(struct repository *r,
>  		break;
>  	}
>  
> +	if (status && path && (flags & OBJECT_INFO_DIE_IF_CORRUPT))

Also, here, "path" should be valid, as we have successfully opened
the loose object file (otherwise we would have hit the early return
that reports only the oid_to_hex(oid)).

> +		die(_("loose object %s (stored in %s) is corrupt"),
> +		    oid_to_hex(oid), path);

If we didn't have path and did not die, we'd end up ignoring
DIE_IF_CORRUPT request and force the caller to handle non-zero
status.  But I do not think that should happen, because path would
be valid here.  No?
Jeff King Dec. 13, 2022, 10:38 a.m. UTC | #2
On Tue, Dec 13, 2022 at 10:51:57AM +0900, Junio C Hamano wrote:

> Jonathan Tan <jonathantanmy@google.com> writes:
> 
> > diff --git a/object-file.c b/object-file.c
> > index 429e3a746d..e0cef8b906 100644
> > --- a/object-file.c
> > +++ b/object-file.c
> > @@ -1422,7 +1422,9 @@ static int loose_object_info(struct repository *r,
> >  			     struct object_info *oi, int flags)
> >  {
> >  	int status = 0;
> > +	int fd;
> >  	unsigned long mapsize;
> > +	const char *path = NULL;
> 
> It may be OK to leave this uninitialized, as long as the contract of
> open_loose_object() is that a successful opening will report the path
> to the loose object file that was opened.  Because ...

Yeah, I'd agree that this initialization can be left off (and that the
NULL checks later in the function are not needed).

-Peff
diff mbox series

Patch

diff --git a/object-file.c b/object-file.c
index 429e3a746d..e0cef8b906 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1422,7 +1422,9 @@  static int loose_object_info(struct repository *r,
 			     struct object_info *oi, int flags)
 {
 	int status = 0;
+	int fd;
 	unsigned long mapsize;
+	const char *path = NULL;
 	void *map;
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
@@ -1443,7 +1445,6 @@  static int loose_object_info(struct repository *r,
 	 * object even exists.
 	 */
 	if (!oi->typep && !oi->type_name && !oi->sizep && !oi->contentp) {
-		const char *path;
 		struct stat st;
 		if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK))
 			return quick_has_loose(r, oid) ? 0 : -1;
@@ -1454,7 +1455,13 @@  static int loose_object_info(struct repository *r,
 		return 0;
 	}
 
-	map = map_loose_object(r, oid, &mapsize);
+	fd = open_loose_object(r, oid, &path);
+	if (fd < 0) {
+		if (errno != ENOENT)
+			error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
+		return -1;
+	}
+	map = map_fd(fd, path, &mapsize);
 	if (!map)
 		return -1;
 
@@ -1492,6 +1499,10 @@  static int loose_object_info(struct repository *r,
 		break;
 	}
 
+	if (status && path && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
+		die(_("loose object %s (stored in %s) is corrupt"),
+		    oid_to_hex(oid), path);
+
 	git_inflate_end(&stream);
 cleanup:
 	munmap(map, mapsize);
@@ -1601,6 +1612,15 @@  static int do_oid_object_info_extended(struct repository *r,
 			continue;
 		}
 
+		if (flags & OBJECT_INFO_DIE_IF_CORRUPT) {
+			const struct packed_git *p;
+			if ((flags & OBJECT_INFO_LOOKUP_REPLACE) && !oideq(real, oid))
+				die(_("replacement %s not found for %s"),
+				    oid_to_hex(real), oid_to_hex(oid));
+			if ((p = has_packed_and_bad(r, real)))
+				die(_("packed object %s (stored in %s) is corrupt"),
+				    oid_to_hex(real), p->pack_name);
+		}
 		return -1;
 	}
 
@@ -1653,7 +1673,8 @@  int oid_object_info(struct repository *r,
 
 static void *read_object(struct repository *r,
 			 const struct object_id *oid, enum object_type *type,
-			 unsigned long *size)
+			 unsigned long *size,
+			 int die_if_corrupt)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 	void *content;
@@ -1661,7 +1682,8 @@  static void *read_object(struct repository *r,
 	oi.sizep = size;
 	oi.contentp = &content;
 
-	if (oid_object_info_extended(r, oid, &oi, 0) < 0)
+	if (oid_object_info_extended(r, oid, &oi, die_if_corrupt
+				     ? OBJECT_INFO_DIE_IF_CORRUPT : 0) < 0)
 		return NULL;
 	return content;
 }
@@ -1697,35 +1719,14 @@  void *read_object_file_extended(struct repository *r,
 				int lookup_replace)
 {
 	void *data;
-	const struct packed_git *p;
-	const char *path;
-	struct stat st;
 	const struct object_id *repl = lookup_replace ?
 		lookup_replace_object(r, oid) : oid;
 
 	errno = 0;
-	data = read_object(r, repl, type, size);
+	data = read_object(r, repl, type, size, 1);
 	if (data)
 		return data;
 
-	obj_read_lock();
-	if (errno && errno != ENOENT)
-		die_errno(_("failed to read object %s"), oid_to_hex(oid));
-
-	/* die if we replaced an object with one that does not exist */
-	if (repl != oid)
-		die(_("replacement %s not found for %s"),
-		    oid_to_hex(repl), oid_to_hex(oid));
-
-	if (!stat_loose_object(r, repl, &st, &path))
-		die(_("loose object %s (stored in %s) is corrupt"),
-		    oid_to_hex(repl), path);
-
-	if ((p = has_packed_and_bad(r, repl)))
-		die(_("packed object %s (stored in %s) is corrupt"),
-		    oid_to_hex(repl), p->pack_name);
-	obj_read_unlock();
-
 	return NULL;
 }
 
@@ -2268,7 +2269,7 @@  int force_object_loose(const struct object_id *oid, time_t mtime)
 
 	if (has_loose_object(oid))
 		return 0;
-	buf = read_object(the_repository, oid, &type, &len);
+	buf = read_object(the_repository, oid, &type, &len, 0);
 	if (!buf)
 		return error(_("cannot read object for %s"), oid_to_hex(oid));
 	hdrlen = format_object_header(hdr, sizeof(hdr), type, len);
diff --git a/object-store.h b/object-store.h
index b1ec0bde82..98c1d67946 100644
--- a/object-store.h
+++ b/object-store.h
@@ -445,6 +445,9 @@  struct object_info {
  */
 #define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK)
 
+/* Die if object corruption (not just an object being missing) was detected. */
+#define OBJECT_INFO_DIE_IF_CORRUPT 32
+
 int oid_object_info_extended(struct repository *r,
 			     const struct object_id *,
 			     struct object_info *, unsigned flags);