mbox series

[v2,00/10] reftable: fix -Wsign-compare warnings

Message ID 20250120-b4-pks-reftable-sign-compare-v2-0-b4566d02e4a5@pks.im (mailing list archive)
Headers show
Series reftable: fix -Wsign-compare warnings | expand

Message

Patrick Steinhardt Jan. 20, 2025, 4:17 p.m. UTC
Hi,

during the last steps of converting the reftable codebase to become a
standalone library I noticed that the new -Wsign-compare warnings
created a bit of a problem due to the `DISABLE_SIGN_COMPARE_WARNINGS`
macro that we started using. As a consequence I wasn't able to easily
drop "git-compat-util.h" anymore. This patch series is thus addresses
the issue by fixing all sign comparison warnings in the reftable
library.

Changes in v2:
  - Document why we add/subtract 1 when encoding and decoding varints.
  - Constistently use hex representations when encoding varints.
  - Use `+` instead of `|` to add values in varint decoding -- it
    shouldn't make a difference, but is closer to what Git's own
    algorithm uses.
  - Link to v1: https://lore.kernel.org/r/20250116-b4-pks-reftable-sign-compare-v1-0-bd30e2ee96e7@pks.im

Thanks!

Patrick

---
Patrick Steinhardt (10):
      meson: stop disabling -Wsign-compare
      reftable/record: drop unused `print` function pointer
      reftable/record: handle overflows when decoding varints
      reftable/basics: adjust `common_prefix_size()` to return `size_t`
      reftable/basics: adjust `hash_size()` to return `uint32_t`
      reftable/block: adapt header and footer size to return a `size_t`
      reftable/block: adjust type of the restart length
      reftable/blocksource: adjust type of the block length
      reftable/blocksource: adjust `read_block()` to return `ssize_t`
      reftable: address trivial -Wsign-compare warnings

 meson.build                         |   1 -
 reftable/basics.c                   |  10 ++-
 reftable/basics.h                   |   4 +-
 reftable/block.c                    |  20 +++---
 reftable/block.h                    |  14 ++--
 reftable/blocksource.c              |   8 +--
 reftable/reader.c                   |  32 +++++----
 reftable/reader.h                   |   6 +-
 reftable/record.c                   | 125 +++++++++++++++++-------------------
 reftable/record.h                   |  25 ++++----
 reftable/reftable-blocksource.h     |  13 ++--
 reftable/reftable-record.h          |   4 +-
 reftable/reftable-writer.h          |   2 +-
 reftable/stack.c                    |  12 ++--
 reftable/system.h                   |   2 -
 reftable/writer.c                   |   7 +-
 t/unit-tests/t-reftable-basics.c    |   2 +-
 t/unit-tests/t-reftable-readwrite.c |   2 +-
 t/unit-tests/t-reftable-record.c    |  19 +++++-
 19 files changed, 157 insertions(+), 151 deletions(-)

Range-diff versus v1:

 1:  2fb873ee80 =  1:  dcc0fa1906 meson: stop disabling -Wsign-compare
 2:  da4f0ce11e =  2:  059ee6afe1 reftable/record: drop unused `print` function pointer
 3:  6f242d9af0 !  3:  2a50096d14 reftable/record: handle overflows when decoding varints
    @@ Commit message
         implementation is basically copied from Git's own `decode_varint()`,
         which already knows to handle overflows. The only adjustment is that we
         also take into account the string view's length in order to not overrun
    -    it.
    +    it. The reftable documentation explicitly notes that those two encoding
    +    schemas are supposed to be the same:
    +
    +        Varint encoding
    +        ^^^^^^^^^^^^^^^
    +
    +        Varint encoding is identical to the ofs-delta encoding method used
    +        within pack files.
    +
    +        Decoder works as follows:
    +
    +        ....
    +        val = buf[ptr] & 0x7f
    +        while (buf[ptr] & 0x80) {
    +          ptr++
    +          val = ((val + 1) << 7) | (buf[ptr] & 0x7f)
    +        }
    +        ....
     
         While at it, refactor `put_var_int()` in the same way by copying over
         the implementation of `encode_varint()`. While `put_var_int()` doesn't
    @@ reftable/record.c: static void *reftable_record_data(struct reftable_record *rec
     +	val = c & 0x7f;
     +
     +	while (c & 0x80) {
    ++		/*
    ++		 * We use a micro-optimization here: whenever we see that the
    ++		 * 0x80 bit is set, we know that the remainder of the value
    ++		 * cannot be 0. The zero-values thus doesn't need to be encoded
    ++		 * at all, which is why we subtract 1 when encoding and add 1
    ++		 * when decoding.
    ++		 *
    ++		 * This allows us to save a byte in some edge cases.
    ++		 */
     +		val += 1;
     +		if (!val || (val & (uint64_t)(~0ULL << (64 - 7))))
     +			return -1; /* overflow */
    @@ reftable/record.c: static void *reftable_record_data(struct reftable_record *rec
     -		}
     -		val = (val + 1) << 7 | (uint64_t)(in->buf[ptr] & 0x7f);
     +		c = *buf++;
    -+		val = (val << 7) | (c & 0x7f);
    ++		val = (val << 7) + (c & 0x7f);
      	}
      
      	*dest = val;
    @@ reftable/record.c: static void *reftable_record_data(struct reftable_record *rec
     -	if (dest->len < n)
     +	unsigned char varint[10];
     +	unsigned pos = sizeof(varint) - 1;
    -+	varint[pos] = value & 127;
    ++	varint[pos] = value & 0x7f;
     +	while (value >>= 7)
    -+		varint[--pos] = 128 | (--value & 127);
    ++		varint[--pos] = 0x80 | (--value & 0x7f);
     +	if (dest->len < sizeof(varint) - pos)
      		return -1;
     -	memcpy(dest->buf, &buf[i + 1], n);
    @@ reftable/record.c: static void *reftable_record_data(struct reftable_record *rec
     
      ## reftable/record.h ##
     @@ reftable/record.h: static inline void string_view_consume(struct string_view *s, int n)
    + 	s->len -= n;
    + }
      
    - /* utilities for de/encoding varints */
    - 
    +-/* utilities for de/encoding varints */
    +-
     +/*
     + * Decode and encode a varint. Returns the number of bytes read/written, or a
     + * negative value in case encoding/decoding the varint has failed.
 4:  e109babe15 !  4:  227002d330 reftable/basics: adjust `common_prefix_size()` to return `size_t`
    @@ reftable/record.c: int reftable_encode_key(int *restart, struct string_view dest
      	string_view_consume(&dest, n);
     
      ## reftable/writer.c ##
    +@@ reftable/writer.c: static int writer_finish_section(struct reftable_writer *w)
    + 
    + struct common_prefix_arg {
    + 	struct reftable_buf *last;
    +-	int max;
    ++	size_t max;
    + };
    + 
    + static void update_common(void *void_arg, void *key)
     @@ reftable/writer.c: static void update_common(void *void_arg, void *key)
      	struct common_prefix_arg *arg = void_arg;
      	struct obj_index_tree_node *entry = key;
 5:  bf9a568639 =  5:  63e709b44f reftable/basics: adjust `hash_size()` to return `uint32_t`
 6:  65b7137b90 =  6:  7f620bfeca reftable/block: adapt header and footer size to return a `size_t`
 7:  88748fd8a1 =  7:  e679e37e32 reftable/block: adjust type of the restart length
 8:  7d8c0eda59 =  8:  43d20a41e0 reftable/blocksource: adjust type of the block length
 9:  048be4c779 =  9:  48cccb5c00 reftable/blocksource: adjust `read_block()` to return `ssize_t`
10:  31b97b00f4 ! 10:  83f35b88d4 reftable: address trivial -Wsign-compare warnings
    @@ reftable/system.h: license that can be found in the LICENSE file or at
      #include "git-compat-util.h"
      
      /*
    -
    - ## reftable/writer.c ##
    -@@ reftable/writer.c: static int writer_finish_section(struct reftable_writer *w)
    - 
    - struct common_prefix_arg {
    - 	struct reftable_buf *last;
    --	int max;
    -+	size_t max;
    - };
    - 
    - static void update_common(void *void_arg, void *key)

---
base-commit: 757161efcca150a9a96b312d9e780a071e601a03
change-id: 20250116-b4-pks-reftable-sign-compare-8eaabae74c06