zlib.c: use size_t for size
diff mbox series

Message ID xmqqsh1bbq36.fsf@gitster-ct.c.googlers.com
State New
Headers show
Series
  • zlib.c: use size_t for size
Related show

Commit Message

Junio C Hamano Oct. 12, 2018, 7:07 a.m. UTC
From: Martin Koegler <martin.koegler@chello.at>
Date: Thu, 10 Aug 2017 20:13:08 +0200

Signed-off-by: Martin Koegler <martin.koegler@chello.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * I made minimal adjustments to make the change apply to today's
   codebase.  I still find some choices and mixing of off_t and
   size_t done by the patch a bit iffy, and some arith may need to
   become st_addX().  Extra set of eyes are very much appreciated.

 builtin/pack-objects.c | 10 +++++-----
 cache.h                | 10 +++++-----
 pack-check.c           |  6 +++---
 pack.h                 |  2 +-
 packfile.h             |  2 +-
 wrapper.c              |  8 ++++----
 zlib.c                 |  8 ++++----
 7 files changed, 23 insertions(+), 23 deletions(-)

Comments

Johannes Schindelin Oct. 12, 2018, 9:54 a.m. UTC | #1
Hi Junio,

On Fri, 12 Oct 2018, Junio C Hamano wrote:

> From: Martin Koegler <martin.koegler@chello.at>
> Date: Thu, 10 Aug 2017 20:13:08 +0200
> 
> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
> 
>  * I made minimal adjustments to make the change apply to today's
>    codebase.  I still find some choices and mixing of off_t and
>    size_t done by the patch a bit iffy, and some arith may need to
>    become st_addX().  Extra set of eyes are very much appreciated.
> 
>  builtin/pack-objects.c | 10 +++++-----
>  cache.h                | 10 +++++-----
>  pack-check.c           |  6 +++---
>  pack.h                 |  2 +-
>  packfile.h             |  2 +-
>  wrapper.c              |  8 ++++----
>  zlib.c                 |  8 ++++----
>  7 files changed, 23 insertions(+), 23 deletions(-)
> 
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index e6316d294d..b9ca04eb8a 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -266,15 +266,15 @@ static void copy_pack_data(struct hashfile *f,
>  		struct packed_git *p,
>  		struct pack_window **w_curs,
>  		off_t offset,
> -		off_t len)
> +		size_t len)
>  {
>  	unsigned char *in;
> -	unsigned long avail;
> +	size_t avail;
>  
>  	while (len) {
>  		in = use_pack(p, w_curs, offset, &avail);
>  		if (avail > len)

Do we have to be careful to handle sizeof(off_t) != sizeof(size_t) here? I
guess we would receive a compiler warning about different sizes in that
case, but it makes me worry.

Not that it matters, as we currently have an *even worse* situation where
we *know* that `unsigned long` is 4 bytes on 64-bit Windows, and `off_t`
(the version we use, at least) is 8 bytes.

> -			avail = (unsigned long)len;
> +		avail = len;

Indentation change?

Apart from the consideration about different sizes of the data types
(which could maybe addressed via a comment in the commit message, after
thinking deeply about it), I think this patch is good to go.

Ciao,
Dscho

>  		hashwrite(f, in, avail);
>  		offset += avail;
>  		len -= avail;
> @@ -1478,8 +1478,8 @@ static void check_object(struct object_entry *entry)
>  		struct pack_window *w_curs = NULL;
>  		const unsigned char *base_ref = NULL;
>  		struct object_entry *base_entry;
> -		unsigned long used, used_0;
> -		unsigned long avail;
> +		size_t used, used_0;
> +		size_t avail;
>  		off_t ofs;
>  		unsigned char *buf, c;
>  		enum object_type type;
> diff --git a/cache.h b/cache.h
> index d508f3d4f8..fce53fe620 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -20,10 +20,10 @@
>  #include <zlib.h>
>  typedef struct git_zstream {
>  	z_stream z;
> -	unsigned long avail_in;
> -	unsigned long avail_out;
> -	unsigned long total_in;
> -	unsigned long total_out;
> +	size_t avail_in;
> +	size_t avail_out;
> +	size_t total_in;
> +	size_t total_out;
>  	unsigned char *next_in;
>  	unsigned char *next_out;
>  } git_zstream;
> @@ -40,7 +40,7 @@ void git_deflate_end(git_zstream *);
>  int git_deflate_abort(git_zstream *);
>  int git_deflate_end_gently(git_zstream *);
>  int git_deflate(git_zstream *, int flush);
> -unsigned long git_deflate_bound(git_zstream *, unsigned long);
> +size_t git_deflate_bound(git_zstream *, size_t);
>  
>  /* The length in bytes and in hex digits of an object name (SHA-1 value). */
>  #define GIT_SHA1_RAWSZ 20
> diff --git a/pack-check.c b/pack-check.c
> index fa5f0ff8fa..575e3e7125 100644
> --- a/pack-check.c
> +++ b/pack-check.c
> @@ -27,13 +27,13 @@ static int compare_entries(const void *e1, const void *e2)
>  }
>  
>  int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
> -		   off_t offset, off_t len, unsigned int nr)
> +		   off_t offset, size_t len, unsigned int nr)
>  {
>  	const uint32_t *index_crc;
>  	uint32_t data_crc = crc32(0, NULL, 0);
>  
>  	do {
> -		unsigned long avail;
> +		size_t avail;
>  		void *data = use_pack(p, w_curs, offset, &avail);
>  		if (avail > len)
>  			avail = len;
> @@ -68,7 +68,7 @@ static int verify_packfile(struct packed_git *p,
>  
>  	the_hash_algo->init_fn(&ctx);
>  	do {
> -		unsigned long remaining;
> +		size_t remaining;
>  		unsigned char *in = use_pack(p, w_curs, offset, &remaining);
>  		offset += remaining;
>  		if (!pack_sig_ofs)
> diff --git a/pack.h b/pack.h
> index 34a9d458b4..1c9fecf929 100644
> --- a/pack.h
> +++ b/pack.h
> @@ -78,7 +78,7 @@ struct progress;
>  typedef int (*verify_fn)(const struct object_id *, enum object_type, unsigned long, void*, int*);
>  
>  extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, const unsigned char *sha1);
> -extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
> +extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, size_t len, unsigned int nr);
>  extern int verify_pack_index(struct packed_git *);
>  extern int verify_pack(struct packed_git *, verify_fn fn, struct progress *, uint32_t);
>  extern off_t write_pack_header(struct hashfile *f, uint32_t);
> diff --git a/packfile.h b/packfile.h
> index 442625723d..e2daf63426 100644
> --- a/packfile.h
> +++ b/packfile.h
> @@ -78,7 +78,7 @@ extern void close_pack_index(struct packed_git *);
>  
>  extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
>  
> -extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
> +extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, size_t *);
>  extern void close_pack_windows(struct packed_git *);
>  extern void close_pack(struct packed_git *);
>  extern void close_all_packs(struct raw_object_store *o);
> diff --git a/wrapper.c b/wrapper.c
> index e4fa9d84cd..1a510bd6fc 100644
> --- a/wrapper.c
> +++ b/wrapper.c
> @@ -67,11 +67,11 @@ static void *do_xmalloc(size_t size, int gentle)
>  			ret = malloc(1);
>  		if (!ret) {
>  			if (!gentle)
> -				die("Out of memory, malloc failed (tried to allocate %lu bytes)",
> -				    (unsigned long)size);
> +				die("Out of memory, malloc failed (tried to allocate %" PRIuMAX " bytes)",
> +				    (uintmax_t)size);
>  			else {
> -				error("Out of memory, malloc failed (tried to allocate %lu bytes)",
> -				      (unsigned long)size);
> +				error("Out of memory, malloc failed (tried to allocate %" PRIuMAX " bytes)",
> +				      (uintmax_t)size);
>  				return NULL;
>  			}
>  		}
> diff --git a/zlib.c b/zlib.c
> index d594cba3fc..197a1acc7b 100644
> --- a/zlib.c
> +++ b/zlib.c
> @@ -29,7 +29,7 @@ static const char *zerr_to_string(int status)
>   */
>  /* #define ZLIB_BUF_MAX ((uInt)-1) */
>  #define ZLIB_BUF_MAX ((uInt) 1024 * 1024 * 1024) /* 1GB */
> -static inline uInt zlib_buf_cap(unsigned long len)
> +static inline uInt zlib_buf_cap(size_t len)
>  {
>  	return (ZLIB_BUF_MAX < len) ? ZLIB_BUF_MAX : len;
>  }
> @@ -46,8 +46,8 @@ static void zlib_pre_call(git_zstream *s)
>  
>  static void zlib_post_call(git_zstream *s)
>  {
> -	unsigned long bytes_consumed;
> -	unsigned long bytes_produced;
> +	size_t bytes_consumed;
> +	size_t bytes_produced;
>  
>  	bytes_consumed = s->z.next_in - s->next_in;
>  	bytes_produced = s->z.next_out - s->next_out;
> @@ -150,7 +150,7 @@ int git_inflate(git_zstream *strm, int flush)
>  #define deflateBound(c,s)  ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
>  #endif
>  
> -unsigned long git_deflate_bound(git_zstream *strm, unsigned long size)
> +size_t git_deflate_bound(git_zstream *strm, size_t size)
>  {
>  	return deflateBound(&strm->z, size);
>  }
> -- 
> 2.19.1-328-g5a0cc8aca7
> 
>
Junio C Hamano Oct. 12, 2018, 1:52 p.m. UTC | #2
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Hi Junio,
>
> On Fri, 12 Oct 2018, Junio C Hamano wrote:
>
>> From: Martin Koegler <martin.koegler@chello.at>
>> Date: Thu, 10 Aug 2017 20:13:08 +0200
>> 
>> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
>> Signed-off-by: Junio C Hamano <gitster@pobox.com>
>> ---
>> 
>>  * I made minimal adjustments to make the change apply to today's
>>    codebase.  I still find some choices and mixing of off_t and
>>    size_t done by the patch a bit iffy, and some arith may need to
>>    become st_addX().  Extra set of eyes are very much appreciated.
>> 
>>  builtin/pack-objects.c | 10 +++++-----
>>  cache.h                | 10 +++++-----
>>  pack-check.c           |  6 +++---
>>  pack.h                 |  2 +-
>>  packfile.h             |  2 +-
>>  wrapper.c              |  8 ++++----
>>  zlib.c                 |  8 ++++----
>>  7 files changed, 23 insertions(+), 23 deletions(-)
>> 
>> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
>> index e6316d294d..b9ca04eb8a 100644
>> --- a/builtin/pack-objects.c
>> +++ b/builtin/pack-objects.c
>> @@ -266,15 +266,15 @@ static void copy_pack_data(struct hashfile *f,
>>  		struct packed_git *p,
>>  		struct pack_window **w_curs,
>>  		off_t offset,
>> -		off_t len)
>> +		size_t len)
>>  {
>>  	unsigned char *in;
>> -	unsigned long avail;
>> +	size_t avail;
>>  
>>  	while (len) {
>>  		in = use_pack(p, w_curs, offset, &avail);
>>  		if (avail > len)
>
> Do we have to be careful to handle sizeof(off_t) != sizeof(size_t) here? I
> guess we would receive a compiler warning about different sizes in that
> case, but it makes me worry.

Yup, you just said exactly the same thing as I already said in the
part you quoted.  I still find the mixed use of off_t and size_t in
this patch iffy, and that was the secondary reason why the patch was
kept in the stalled state for so long.  The primary reason was that
nobody tried to dust it off and reignite the topic so far---which I
am trying to correct, but as I said, this is just minimally adjusted
to today's codebase, without any attempt to improve relative to the
original patch.

I think we are in agreement in that this is not making things worse,
as we are already in the mixed arith territory before this patch.
Johannes Schindelin Oct. 12, 2018, 3:34 p.m. UTC | #3
Hi Junio,

On Fri, 12 Oct 2018, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> > Hi Junio,
> >
> > On Fri, 12 Oct 2018, Junio C Hamano wrote:
> >
> >> From: Martin Koegler <martin.koegler@chello.at>
> >> Date: Thu, 10 Aug 2017 20:13:08 +0200
> >> 
> >> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
> >> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> >> ---
> >> 
> >>  * I made minimal adjustments to make the change apply to today's
> >>    codebase.  I still find some choices and mixing of off_t and
> >>    size_t done by the patch a bit iffy, and some arith may need to
> >>    become st_addX().  Extra set of eyes are very much appreciated.
> >> 
> >>  builtin/pack-objects.c | 10 +++++-----
> >>  cache.h                | 10 +++++-----
> >>  pack-check.c           |  6 +++---
> >>  pack.h                 |  2 +-
> >>  packfile.h             |  2 +-
> >>  wrapper.c              |  8 ++++----
> >>  zlib.c                 |  8 ++++----
> >>  7 files changed, 23 insertions(+), 23 deletions(-)
> >> 
> >> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> >> index e6316d294d..b9ca04eb8a 100644
> >> --- a/builtin/pack-objects.c
> >> +++ b/builtin/pack-objects.c
> >> @@ -266,15 +266,15 @@ static void copy_pack_data(struct hashfile *f,
> >>  		struct packed_git *p,
> >>  		struct pack_window **w_curs,
> >>  		off_t offset,
> >> -		off_t len)
> >> +		size_t len)
> >>  {
> >>  	unsigned char *in;
> >> -	unsigned long avail;
> >> +	size_t avail;
> >>  
> >>  	while (len) {
> >>  		in = use_pack(p, w_curs, offset, &avail);
> >>  		if (avail > len)
> >
> > Do we have to be careful to handle sizeof(off_t) != sizeof(size_t) here? I
> > guess we would receive a compiler warning about different sizes in that
> > case, but it makes me worry.
> 
> Yup, you just said exactly the same thing as I already said in the
> part you quoted.  I still find the mixed use of off_t and size_t in
> this patch iffy, and that was the secondary reason why the patch was
> kept in the stalled state for so long.  The primary reason was that
> nobody tried to dust it off and reignite the topic so far---which I
> am trying to correct, but as I said, this is just minimally adjusted
> to today's codebase, without any attempt to improve relative to the
> original patch.
> 
> I think we are in agreement in that this is not making things worse,
> as we are already in the mixed arith territory before this patch.

I will *try* to find the time to audit this some more, then. Maybe next
week, maybe the one afterwards... ;-)

Ciao,
Dscho

> 
>
Ramsay Jones Oct. 12, 2018, 11:23 p.m. UTC | #4
On 12/10/18 16:34, Johannes Schindelin wrote:
> Hi Junio,
> 
> On Fri, 12 Oct 2018, Junio C Hamano wrote:
> 
>> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>>
>>> Hi Junio,
>>>
>>> On Fri, 12 Oct 2018, Junio C Hamano wrote:
>>>
>>>> From: Martin Koegler <martin.koegler@chello.at>
>>>> Date: Thu, 10 Aug 2017 20:13:08 +0200
>>>>
>>>> Signed-off-by: Martin Koegler <martin.koegler@chello.at>
>>>> Signed-off-by: Junio C Hamano <gitster@pobox.com>
>>>> ---
>>>>
>>>>  * I made minimal adjustments to make the change apply to today's
>>>>    codebase.  I still find some choices and mixing of off_t and
>>>>    size_t done by the patch a bit iffy, and some arith may need to
>>>>    become st_addX().  Extra set of eyes are very much appreciated.
>>>>
>>>>  builtin/pack-objects.c | 10 +++++-----
>>>>  cache.h                | 10 +++++-----
>>>>  pack-check.c           |  6 +++---
>>>>  pack.h                 |  2 +-
>>>>  packfile.h             |  2 +-
>>>>  wrapper.c              |  8 ++++----
>>>>  zlib.c                 |  8 ++++----
>>>>  7 files changed, 23 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
>>>> index e6316d294d..b9ca04eb8a 100644
>>>> --- a/builtin/pack-objects.c
>>>> +++ b/builtin/pack-objects.c
>>>> @@ -266,15 +266,15 @@ static void copy_pack_data(struct hashfile *f,
>>>>  		struct packed_git *p,
>>>>  		struct pack_window **w_curs,
>>>>  		off_t offset,
>>>> -		off_t len)
>>>> +		size_t len)
>>>>  {
>>>>  	unsigned char *in;
>>>> -	unsigned long avail;
>>>> +	size_t avail;
>>>>  
>>>>  	while (len) {
>>>>  		in = use_pack(p, w_curs, offset, &avail);
>>>>  		if (avail > len)
>>>
>>> Do we have to be careful to handle sizeof(off_t) != sizeof(size_t) here? I
>>> guess we would receive a compiler warning about different sizes in that
>>> case, but it makes me worry.
>>
>> Yup, you just said exactly the same thing as I already said in the
>> part you quoted.  I still find the mixed use of off_t and size_t in
>> this patch iffy, and that was the secondary reason why the patch was
>> kept in the stalled state for so long.  The primary reason was that
>> nobody tried to dust it off and reignite the topic so far---which I
>> am trying to correct, but as I said, this is just minimally adjusted
>> to today's codebase, without any attempt to improve relative to the
>> original patch.
>>
>> I think we are in agreement in that this is not making things worse,
>> as we are already in the mixed arith territory before this patch.
> 
> I will *try* to find the time to audit this some more, then. Maybe next
> week, maybe the one afterwards... ;-)

This fails to compile on 32-bit Linux. The patch given below is
what I used to get git to build on 32-bit Linux (and it even
passes the tests).

I haven't even given the off_t -> size_t issue any thought, but I
suspect that will have to change as well. Anyway, I have no more
time tonight ...

ATB,
Ramsay Jones

-- >8 --
Subject: [PATCH] zlib: minimum fix-up on 32-bit linux

Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
---
 builtin/pack-objects.c |  3 ++-
 packfile.c             | 16 ++++++++--------
 packfile.h             |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 7e693071b..cc11a0426 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -2002,7 +2002,8 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
 	struct pack_window *w_curs;
 	unsigned char *buf;
 	enum object_type type;
-	unsigned long used, avail, size;
+	size_t used, avail;
+	unsigned long size;
 
 	if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
 		read_lock();
diff --git a/packfile.c b/packfile.c
index f2850a00b..7571ac988 100644
--- a/packfile.c
+++ b/packfile.c
@@ -585,7 +585,7 @@ static int in_window(struct pack_window *win, off_t offset)
 unsigned char *use_pack(struct packed_git *p,
 		struct pack_window **w_cursor,
 		off_t offset,
-		unsigned long *left)
+		size_t *left)
 {
 	struct pack_window *win = *w_cursor;
 
@@ -1034,19 +1034,19 @@ struct list_head *get_packed_git_mru(struct repository *r)
 	return &r->objects->packed_git_mru;
 }
 
-unsigned long unpack_object_header_buffer(const unsigned char *buf,
-		unsigned long len, enum object_type *type, unsigned long *sizep)
+size_t unpack_object_header_buffer(const unsigned char *buf,
+		size_t len, enum object_type *type, unsigned long *sizep)
 {
 	unsigned shift;
-	unsigned long size, c;
-	unsigned long used = 0;
+	size_t size, c;
+	size_t used = 0;
 
 	c = buf[used++];
 	*type = (c >> 4) & 7;
 	size = c & 15;
 	shift = 4;
 	while (c & 0x80) {
-		if (len <= used || bitsizeof(long) <= shift) {
+		if (len <= used || bitsizeof(size_t) <= shift) {
 			error("bad object header");
 			size = used = 0;
 			break;
@@ -1104,8 +1104,8 @@ int unpack_object_header(struct packed_git *p,
 			 unsigned long *sizep)
 {
 	unsigned char *base;
-	unsigned long left;
-	unsigned long used;
+	size_t left;
+	size_t used;
 	enum object_type type;
 
 	/* use_pack() assures us we have [base, base + 20) available
diff --git a/packfile.h b/packfile.h
index 1fb482424..e8bf11b62 100644
--- a/packfile.h
+++ b/packfile.h
@@ -132,7 +132,7 @@ extern off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *)
 
 extern int is_pack_valid(struct packed_git *);
 extern void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
-extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
+extern size_t unpack_object_header_buffer(const unsigned char *buf, size_t len, enum object_type *type, unsigned long *sizep);
 extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
Jeff King Oct. 13, 2018, 2:38 a.m. UTC | #5
On Fri, Oct 12, 2018 at 04:07:25PM +0900, Junio C Hamano wrote:

> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index e6316d294d..b9ca04eb8a 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -266,15 +266,15 @@ static void copy_pack_data(struct hashfile *f,
>  		struct packed_git *p,
>  		struct pack_window **w_curs,
>  		off_t offset,
> -		off_t len)
> +		size_t len)
>  {
>  	unsigned char *in;
> -	unsigned long avail;
> +	size_t avail;

I know there were a lot of comments about "maybe this off_t switch is
not good". Let me say something a bit stronger: I think this part of the
change is strictly worse.

copy_pack_data() looks like this right now:

  static void copy_pack_data(struct hashfile *f,
                  struct packed_git *p,
                  struct pack_window **w_curs,
                  off_t offset,
                  off_t len)
  {
          unsigned char *in;
          unsigned long avail;
  
          while (len) {
                  in = use_pack(p, w_curs, offset, &avail);
                  if (avail > len)
                          avail = (unsigned long)len;
                  hashwrite(f, in, avail);
                  offset += avail;
                  len -= avail;
          }
  }

So right now let's imagine that off_t is 64-bit, and "unsigned long" is
32-bit (e.g., 32-bit system, or an IL32P64 model like Windows). We'll
repeatedly ask use_pack() for a window, and it will tell us how many
bytes we have in "avail". So even as a 32-bit value, that just means
we'll process chunks smaller than 4GB, and this is correct (or at least
this part of it -- hold on). But we can still process the whole "len"
given by the off_t eventually.

But by switching away from off_t in the function interface, we risk
truncation before we even enter the loop. Because of the switch to
size_t, it actually works on an IL32P64 system (because size_t is big
there), but it has introduced a bug on a true 32-bit system. If your
off_t really is 64-bit (and it generally is because we #define
_FILE_OFFSET_BITS), the function will truncate modulo 2^32.

And nor will most compilers warn without -Wconversion. You can try it
with this on Linux:

  #define _FILE_OFFSET_BITS 64
  #include <unistd.h>
  
  void foo(size_t x);
  void bar(off_t x);
  
  void bar(off_t x)
  {
  
  	foo(x);
  }

That compiles fine with "gcc -c -m32 -Wall -Werror -Wextra" for me.
Adding "-Wconversion" catches it, but our code base is not close to
compiling with that warning enabled.

So I don't think this hunk is actually fixing any problems, and is
actually introducing one.

I do in general support moving to size_t over "unsigned long". Switching
avail to size_t makes sense here. It's just the off_t part that is
funny.

-Peff
Jeff King Oct. 13, 2018, 2:46 a.m. UTC | #6
On Fri, Oct 12, 2018 at 10:38:45PM -0400, Jeff King wrote:

> So right now let's imagine that off_t is 64-bit, and "unsigned long" is
> 32-bit (e.g., 32-bit system, or an IL32P64 model like Windows). We'll
> repeatedly ask use_pack() for a window, and it will tell us how many
> bytes we have in "avail". So even as a 32-bit value, that just means
> we'll process chunks smaller than 4GB, and this is correct (or at least
> this part of it -- hold on). But we can still process the whole "len"
> given by the off_t eventually.

So this "hold on" was because I thought I had found another bug in
use_pack(), but I actually think it's OK.

In use_pack(), we do this:

          if (left)
                  *left = win->len - xsize_t(offset);

where win->len is a size_t. Before this patch, "left" is a pointer to
unsigned long. So that has the usual broken-on-Windows mismatch. This
patch makes it a size_t, which is good.

But what's up with that xsize_t(offset)? We'll complain about truncation
_before_ we do any offset subtraction. So at first glance, I thought
this meant we were broken for larger-than-4GB packs on 32-bit systems
when trying to read past the 4GB mark.

But no, right before that we have this line:

          offset -= win->offset;

So offset is in fact no longer its original meaning of "offset into the
packfile", but is now an offset of the specific request into the window
we found.

So I think it's correct, but it sure confused me. I wonder if another
variable might help, like:

  size_t offset_in_window;
  ...

  /*
   * We know this difference will fit in a size_t, because our mmap
   * window by * definition can be no larger than a size_t.
   */
  offset_in_window = xsize_t(offset - win->offset);
  if (left)
	*left = win->len - offset_in_window;
  return win->base + offset_in_window;

I dunno. Maybe it is overkill.

-Peff
Johannes Sixt Oct. 13, 2018, 8:43 a.m. UTC | #7
Am 13.10.18 um 04:46 schrieb Jeff King:
> But no, right before that we have this line:
> 
>            offset -= win->offset;
> 
> So offset is in fact no longer its original meaning of "offset into the
> packfile", but is now an offset of the specific request into the window
> we found.
> 
> So I think it's correct, but it sure confused me. I wonder if another
> variable might help, like:
> 
>    size_t offset_in_window;
>    ...
> 
>    /*
>     * We know this difference will fit in a size_t, because our mmap
>     * window by * definition can be no larger than a size_t.
>     */
>    offset_in_window = xsize_t(offset - win->offset);
>    if (left)
> 	*left = win->len - offset_in_window;
>    return win->base + offset_in_window;
> 
> I dunno. Maybe it is overkill.

Thank you for your analysis. No, I don't think that such a new variable 
would be overkill. It is important to make such knowledge of value 
magnitudes explicit precisely because it reduces confusion and helps 
reviewers of the code verify correctness.

-- Hannes

Patch
diff mbox series

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index e6316d294d..b9ca04eb8a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -266,15 +266,15 @@  static void copy_pack_data(struct hashfile *f,
 		struct packed_git *p,
 		struct pack_window **w_curs,
 		off_t offset,
-		off_t len)
+		size_t len)
 {
 	unsigned char *in;
-	unsigned long avail;
+	size_t avail;
 
 	while (len) {
 		in = use_pack(p, w_curs, offset, &avail);
 		if (avail > len)
-			avail = (unsigned long)len;
+		avail = len;
 		hashwrite(f, in, avail);
 		offset += avail;
 		len -= avail;
@@ -1478,8 +1478,8 @@  static void check_object(struct object_entry *entry)
 		struct pack_window *w_curs = NULL;
 		const unsigned char *base_ref = NULL;
 		struct object_entry *base_entry;
-		unsigned long used, used_0;
-		unsigned long avail;
+		size_t used, used_0;
+		size_t avail;
 		off_t ofs;
 		unsigned char *buf, c;
 		enum object_type type;
diff --git a/cache.h b/cache.h
index d508f3d4f8..fce53fe620 100644
--- a/cache.h
+++ b/cache.h
@@ -20,10 +20,10 @@ 
 #include <zlib.h>
 typedef struct git_zstream {
 	z_stream z;
-	unsigned long avail_in;
-	unsigned long avail_out;
-	unsigned long total_in;
-	unsigned long total_out;
+	size_t avail_in;
+	size_t avail_out;
+	size_t total_in;
+	size_t total_out;
 	unsigned char *next_in;
 	unsigned char *next_out;
 } git_zstream;
@@ -40,7 +40,7 @@  void git_deflate_end(git_zstream *);
 int git_deflate_abort(git_zstream *);
 int git_deflate_end_gently(git_zstream *);
 int git_deflate(git_zstream *, int flush);
-unsigned long git_deflate_bound(git_zstream *, unsigned long);
+size_t git_deflate_bound(git_zstream *, size_t);
 
 /* The length in bytes and in hex digits of an object name (SHA-1 value). */
 #define GIT_SHA1_RAWSZ 20
diff --git a/pack-check.c b/pack-check.c
index fa5f0ff8fa..575e3e7125 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -27,13 +27,13 @@  static int compare_entries(const void *e1, const void *e2)
 }
 
 int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, off_t len, unsigned int nr)
+		   off_t offset, size_t len, unsigned int nr)
 {
 	const uint32_t *index_crc;
 	uint32_t data_crc = crc32(0, NULL, 0);
 
 	do {
-		unsigned long avail;
+		size_t avail;
 		void *data = use_pack(p, w_curs, offset, &avail);
 		if (avail > len)
 			avail = len;
@@ -68,7 +68,7 @@  static int verify_packfile(struct packed_git *p,
 
 	the_hash_algo->init_fn(&ctx);
 	do {
-		unsigned long remaining;
+		size_t remaining;
 		unsigned char *in = use_pack(p, w_curs, offset, &remaining);
 		offset += remaining;
 		if (!pack_sig_ofs)
diff --git a/pack.h b/pack.h
index 34a9d458b4..1c9fecf929 100644
--- a/pack.h
+++ b/pack.h
@@ -78,7 +78,7 @@  struct progress;
 typedef int (*verify_fn)(const struct object_id *, enum object_type, unsigned long, void*, int*);
 
 extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, const unsigned char *sha1);
-extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
+extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, size_t len, unsigned int nr);
 extern int verify_pack_index(struct packed_git *);
 extern int verify_pack(struct packed_git *, verify_fn fn, struct progress *, uint32_t);
 extern off_t write_pack_header(struct hashfile *f, uint32_t);
diff --git a/packfile.h b/packfile.h
index 442625723d..e2daf63426 100644
--- a/packfile.h
+++ b/packfile.h
@@ -78,7 +78,7 @@  extern void close_pack_index(struct packed_git *);
 
 extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
 
-extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
+extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, size_t *);
 extern void close_pack_windows(struct packed_git *);
 extern void close_pack(struct packed_git *);
 extern void close_all_packs(struct raw_object_store *o);
diff --git a/wrapper.c b/wrapper.c
index e4fa9d84cd..1a510bd6fc 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -67,11 +67,11 @@  static void *do_xmalloc(size_t size, int gentle)
 			ret = malloc(1);
 		if (!ret) {
 			if (!gentle)
-				die("Out of memory, malloc failed (tried to allocate %lu bytes)",
-				    (unsigned long)size);
+				die("Out of memory, malloc failed (tried to allocate %" PRIuMAX " bytes)",
+				    (uintmax_t)size);
 			else {
-				error("Out of memory, malloc failed (tried to allocate %lu bytes)",
-				      (unsigned long)size);
+				error("Out of memory, malloc failed (tried to allocate %" PRIuMAX " bytes)",
+				      (uintmax_t)size);
 				return NULL;
 			}
 		}
diff --git a/zlib.c b/zlib.c
index d594cba3fc..197a1acc7b 100644
--- a/zlib.c
+++ b/zlib.c
@@ -29,7 +29,7 @@  static const char *zerr_to_string(int status)
  */
 /* #define ZLIB_BUF_MAX ((uInt)-1) */
 #define ZLIB_BUF_MAX ((uInt) 1024 * 1024 * 1024) /* 1GB */
-static inline uInt zlib_buf_cap(unsigned long len)
+static inline uInt zlib_buf_cap(size_t len)
 {
 	return (ZLIB_BUF_MAX < len) ? ZLIB_BUF_MAX : len;
 }
@@ -46,8 +46,8 @@  static void zlib_pre_call(git_zstream *s)
 
 static void zlib_post_call(git_zstream *s)
 {
-	unsigned long bytes_consumed;
-	unsigned long bytes_produced;
+	size_t bytes_consumed;
+	size_t bytes_produced;
 
 	bytes_consumed = s->z.next_in - s->next_in;
 	bytes_produced = s->z.next_out - s->next_out;
@@ -150,7 +150,7 @@  int git_inflate(git_zstream *strm, int flush)
 #define deflateBound(c,s)  ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
 #endif
 
-unsigned long git_deflate_bound(git_zstream *strm, unsigned long size)
+size_t git_deflate_bound(git_zstream *strm, size_t size)
 {
 	return deflateBound(&strm->z, size);
 }