diff mbox series

[v3,7/8] odb: guard against data loss checking out a huge file

Message ID f59c523bcc48b187680be9149e9311f15bfe06dc.1635515959.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Allow clean/smudge filters to handle huge files in the LLP64 data model | expand

Commit Message

Matt Cooper Oct. 29, 2021, 1:59 p.m. UTC
From: Matt Cooper <vtbassmatt@gmail.com>

This introduces an additional guard for platforms where `unsigned long`
and `size_t` are not of the same size. If the size of an object in the
database would overflow `unsigned long`, instead we now exit with an
error.

A complete fix will have to update _many_ other functions throughout the
codebase to use `size_t` instead of `unsigned long`. It will have to be
implemented at some stage.

This commit puts in a stop-gap for the time being.

Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Matt Cooper <vtbassmatt@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 delta.h       | 6 +++---
 object-file.c | 6 +++---
 packfile.c    | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

Comments

Junio C Hamano Oct. 29, 2021, 11:13 p.m. UTC | #1
"Matt Cooper via GitGitGadget" <gitgitgadget@gmail.com> writes:

> diff --git a/delta.h b/delta.h
> index 2df5fe13d95..8a56ec07992 100644
> --- a/delta.h
> +++ b/delta.h
> @@ -90,15 +90,15 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
>  					       const unsigned char *top)
>  {
>  	const unsigned char *data = *datap;
> -	unsigned long cmd, size = 0;
> +	size_t cmd, size = 0;
>  	int i = 0;
>  	do {
>  		cmd = *data++;
> -		size |= (cmd & 0x7f) << i;
> +		size |= st_left_shift(cmd & 0x7f, i);
>  		i += 7;
>  	} while (cmd & 0x80 && data < top);
>  	*datap = data;
> -	return size;
> +	return cast_size_t_to_ulong(size);
>  }

OK.  The patch-delta code is reasonably self contained, so the next
step to measure the in-core object size in size_t shouldn't be too
bad, but before everything got size_t aware, we have to have the
"safe casting" somewhere, and I agree this "immediately before
return" is a good place to draw the line.

Nicely done.

>  			if (c > 9)
>  				break;
>  			hdr++;
> -			size = size * 10 + c;
> +			size = st_add(st_mult(size, 10), c);

Nice.

>  	if (oi->sizep)
> -		*oi->sizep = size;
> +		*oi->sizep = cast_size_t_to_ulong(size);

OK.

> @@ -1073,10 +1073,10 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
>  			break;
>  		}
>  		c = buf[used++];
> -		size += (c & 0x7f) << shift;
> +		size = st_add(size, st_left_shift(c & 0x7f, shift));
>  		shift += 7;
>  	}
> -	*sizep = size;
> +	*sizep = cast_size_t_to_ulong(size);

OK.

Looking good.
diff mbox series

Patch

diff --git a/delta.h b/delta.h
index 2df5fe13d95..8a56ec07992 100644
--- a/delta.h
+++ b/delta.h
@@ -90,15 +90,15 @@  static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
 					       const unsigned char *top)
 {
 	const unsigned char *data = *datap;
-	unsigned long cmd, size = 0;
+	size_t cmd, size = 0;
 	int i = 0;
 	do {
 		cmd = *data++;
-		size |= (cmd & 0x7f) << i;
+		size |= st_left_shift(cmd & 0x7f, i);
 		i += 7;
 	} while (cmd & 0x80 && data < top);
 	*datap = data;
-	return size;
+	return cast_size_t_to_ulong(size);
 }
 
 #endif
diff --git a/object-file.c b/object-file.c
index f233b440b22..70e456fc2a3 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1344,7 +1344,7 @@  static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
 				       unsigned int flags)
 {
 	const char *type_buf = hdr;
-	unsigned long size;
+	size_t size;
 	int type, type_len = 0;
 
 	/*
@@ -1388,12 +1388,12 @@  static int parse_loose_header_extended(const char *hdr, struct object_info *oi,
 			if (c > 9)
 				break;
 			hdr++;
-			size = size * 10 + c;
+			size = st_add(st_mult(size, 10), c);
 		}
 	}
 
 	if (oi->sizep)
-		*oi->sizep = size;
+		*oi->sizep = cast_size_t_to_ulong(size);
 
 	/*
 	 * The length must be followed by a zero byte
diff --git a/packfile.c b/packfile.c
index 755aa7aec5e..3ccea004396 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1059,7 +1059,7 @@  unsigned long unpack_object_header_buffer(const unsigned char *buf,
 		unsigned long len, enum object_type *type, unsigned long *sizep)
 {
 	unsigned shift;
-	unsigned long size, c;
+	size_t size, c;
 	unsigned long used = 0;
 
 	c = buf[used++];
@@ -1073,10 +1073,10 @@  unsigned long unpack_object_header_buffer(const unsigned char *buf,
 			break;
 		}
 		c = buf[used++];
-		size += (c & 0x7f) << shift;
+		size = st_add(size, st_left_shift(c & 0x7f, shift));
 		shift += 7;
 	}
-	*sizep = size;
+	*sizep = cast_size_t_to_ulong(size);
 	return used;
 }