diff mbox series

[1/5] object-file: prefer array-of-bytes initializer for hash literals

Message ID 20241117090814.GA3409496@coredump.intra.peff.net (mailing list archive)
State New
Headers show
Series [1/5] object-file: prefer array-of-bytes initializer for hash literals | expand

Commit Message

Jeff King Nov. 17, 2024, 9:08 a.m. UTC
We hard-code a few well-known hash values for empty trees and blobs in
both sha1 and sha256 formats. We do so with string literals like this:

  #define EMPTY_TREE_SHA256_BIN_LITERAL \
         "\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1" \
         "\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5" \
         "\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc" \
         "\x53\x21"

and then use it to initialize the hash field of an object_id struct.
That hash field is exactly 32 bytes long (the size we need for sha256).
But the string literal above is actually 33 bytes long due to the NUL
terminator. It's legal in C to initialize from a longer string literal;
the extra bytes are just ignored.

However, the upcoming gcc 15 will start warning about this:

      CC object-file.o
  object-file.c:52:9: warning: initializer-string for array of ‘unsigned char’ is too long [-Wunterminated-string-initialization]
     52 |         "\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1" \
        |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  object-file.c:79:17: note: in expansion of macro ‘EMPTY_TREE_SHA256_BIN_LITERAL’

which is understandable. Even though this is not a bug for us, since we
do not care about the NUL terminator (and are just using the literal as
a convenient format), it would be easy to accidentally create an array
that was mistakenly unterminated.

We can avoid this warning by switching the initializer to an actual
array of unsigned values. That arguably demonstrates our intent more
clearly anyway.

Reported-by: Sam James <sam@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
---
I actually didn't find exact wording in the standard for using a
longer literal. But C99 section 6.7.8 (Initialization), para 32 shows
this exact case as "example 8".

You can view the diff with "--color-words --word-diff-regex=." to more
clearly see that the values themselves weren't changed.

 object-file.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

Comments

René Scharfe Nov. 17, 2024, 9:52 a.m. UTC | #1
Am 17.11.24 um 10:08 schrieb Jeff King:
> We hard-code a few well-known hash values for empty trees and blobs in
> both sha1 and sha256 formats. We do so with string literals like this:
>
>   #define EMPTY_TREE_SHA256_BIN_LITERAL \
>          "\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1" \
>          "\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5" \
>          "\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc" \
>          "\x53\x21"
>
> and then use it to initialize the hash field of an object_id struct.
> That hash field is exactly 32 bytes long (the size we need for sha256).
> But the string literal above is actually 33 bytes long due to the NUL
> terminator. It's legal in C to initialize from a longer string literal;
> the extra bytes are just ignored.
>
> However, the upcoming gcc 15 will start warning about this:
>
>       CC object-file.o
>   object-file.c:52:9: warning: initializer-string for array of ‘unsigned char’ is too long [-Wunterminated-string-initialization]
>      52 |         "\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1" \
>         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>   object-file.c:79:17: note: in expansion of macro ‘EMPTY_TREE_SHA256_BIN_LITERAL’
>
> which is understandable. Even though this is not a bug for us, since we
> do not care about the NUL terminator (and are just using the literal as
> a convenient format), it would be easy to accidentally create an array
> that was mistakenly unterminated.
>
> We can avoid this warning by switching the initializer to an actual
> array of unsigned values. That arguably demonstrates our intent more
> clearly anyway.

OK.

> Reported-by: Sam James <sam@gentoo.org>
> Signed-off-by: Jeff King <peff@peff.net>
> ---
> I actually didn't find exact wording in the standard for using a
> longer literal. But C99 section 6.7.8 (Initialization), para 32 shows
> this exact case as "example 8".
>
> You can view the diff with "--color-words --word-diff-regex=." to more
> clearly see that the values themselves weren't changed.
>
>  object-file.c | 38 +++++++++++++++++++++-----------------
>  1 file changed, 21 insertions(+), 17 deletions(-)
>
> diff --git a/object-file.c b/object-file.c
> index b1a3463852..25ba54594b 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -45,23 +45,27 @@
>  #define MAX_HEADER_LEN 32
>
>
> -#define EMPTY_TREE_SHA1_BIN_LITERAL \
> -	 "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" \
> -	 "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04"
> -#define EMPTY_TREE_SHA256_BIN_LITERAL \
> -	"\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1" \
> -	"\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5" \
> -	"\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc" \
> -	"\x53\x21"
> -
> -#define EMPTY_BLOB_SHA1_BIN_LITERAL \
> -	"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b" \
> -	"\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91"
> -#define EMPTY_BLOB_SHA256_BIN_LITERAL \
> -	"\x47\x3a\x0f\x4c\x3b\xe8\xa9\x36\x81\xa2" \
> -	"\x67\xe3\xb1\xe9\xa7\xdc\xda\x11\x85\x43" \
> -	"\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72" \
> -	"\x18\x13"
> +#define EMPTY_TREE_SHA1_BIN_LITERAL { \
> +	 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,  \
> +	 0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04  \

The added space at the beginning looks seems unintended.

The two spaces before the backslash look odd.  One space, one tab or
lining up the backslashes with spaces would look better.

Patch 5 does away with those spaces, thankfully. :)

> +}
> +#define EMPTY_TREE_SHA256_BIN_LITERAL { \
> +	0x6e, 0xf1, 0x9b, 0x41, 0x22, 0x5c, 0x53, 0x69, 0xf1, 0xc1,  \
> +	0x04, 0xd4, 0x5d, 0x8d, 0x85, 0xef, 0xa9, 0xb0, 0x57, 0xb5,  \
> +	0x3b, 0x14, 0xb4, 0xb9, 0xb9, 0x39, 0xdd, 0x74, 0xde, 0xcc,  \
> +	0x53, 0x21 \
> +}
> +
> +#define EMPTY_BLOB_SHA1_BIN_LITERAL { \
> +	0xe6, 0x9d, 0xe2, 0x9b, 0xb2, 0xd1, 0xd6, 0x43, 0x4b, 0x8b,  \
> +	0x29, 0xae, 0x77, 0x5a, 0xd8, 0xc2, 0xe4, 0x8c, 0x53, 0x91  \
> +}
> +#define EMPTY_BLOB_SHA256_BIN_LITERAL { \
> +	0x47, 0x3a, 0x0f, 0x4c, 0x3b, 0xe8, 0xa9, 0x36, 0x81, 0xa2,  \
> +	0x67, 0xe3, 0xb1, 0xe9, 0xa7, 0xdc, 0xda, 0x11, 0x85, 0x43,  \
> +	0x6f, 0xe1, 0x41, 0xf7, 0x74, 0x91, 0x20, 0xa3, 0x03, 0x72,  \
> +	0x18, 0x13 \
> +}
>
>  static const struct object_id empty_tree_oid = {
>  	.hash = EMPTY_TREE_SHA1_BIN_LITERAL,
Jeff King Nov. 18, 2024, 9:06 a.m. UTC | #2
On Sun, Nov 17, 2024 at 10:52:40AM +0100, René Scharfe wrote:

> > -#define EMPTY_BLOB_SHA256_BIN_LITERAL \
> > -	"\x47\x3a\x0f\x4c\x3b\xe8\xa9\x36\x81\xa2" \
> > -	"\x67\xe3\xb1\xe9\xa7\xdc\xda\x11\x85\x43" \
> > -	"\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72" \
> > -	"\x18\x13"
> > +#define EMPTY_TREE_SHA1_BIN_LITERAL { \
> > +	 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,  \
> > +	 0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04  \
> 
> The added space at the beginning looks seems unintended.

That was from the original, which had a tab followed by a space (maybe
to line up with the "E" in "EMPTY"?). I did s/"// and s/\\x\(..\)/0x\1, /
which left it.

> The two spaces before the backslash look odd.  One space, one tab or
> lining up the backslashes with spaces would look better.

This one is my fault, though. My regex left an extra comma at the end,
which I somehow managed to bungle removing. ;)

> Patch 5 does away with those spaces, thankfully. :)

Yep. Looks like there might be some whitespace oddities left over,
though, so I'll fix this on re-roll.

Thanks.

-Peff
diff mbox series

Patch

diff --git a/object-file.c b/object-file.c
index b1a3463852..25ba54594b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -45,23 +45,27 @@ 
 #define MAX_HEADER_LEN 32
 
 
-#define EMPTY_TREE_SHA1_BIN_LITERAL \
-	 "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" \
-	 "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04"
-#define EMPTY_TREE_SHA256_BIN_LITERAL \
-	"\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1" \
-	"\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5" \
-	"\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc" \
-	"\x53\x21"
-
-#define EMPTY_BLOB_SHA1_BIN_LITERAL \
-	"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b" \
-	"\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91"
-#define EMPTY_BLOB_SHA256_BIN_LITERAL \
-	"\x47\x3a\x0f\x4c\x3b\xe8\xa9\x36\x81\xa2" \
-	"\x67\xe3\xb1\xe9\xa7\xdc\xda\x11\x85\x43" \
-	"\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72" \
-	"\x18\x13"
+#define EMPTY_TREE_SHA1_BIN_LITERAL { \
+	 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,  \
+	 0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04  \
+}
+#define EMPTY_TREE_SHA256_BIN_LITERAL { \
+	0x6e, 0xf1, 0x9b, 0x41, 0x22, 0x5c, 0x53, 0x69, 0xf1, 0xc1,  \
+	0x04, 0xd4, 0x5d, 0x8d, 0x85, 0xef, 0xa9, 0xb0, 0x57, 0xb5,  \
+	0x3b, 0x14, 0xb4, 0xb9, 0xb9, 0x39, 0xdd, 0x74, 0xde, 0xcc,  \
+	0x53, 0x21 \
+}
+
+#define EMPTY_BLOB_SHA1_BIN_LITERAL { \
+	0xe6, 0x9d, 0xe2, 0x9b, 0xb2, 0xd1, 0xd6, 0x43, 0x4b, 0x8b,  \
+	0x29, 0xae, 0x77, 0x5a, 0xd8, 0xc2, 0xe4, 0x8c, 0x53, 0x91  \
+}
+#define EMPTY_BLOB_SHA256_BIN_LITERAL { \
+	0x47, 0x3a, 0x0f, 0x4c, 0x3b, 0xe8, 0xa9, 0x36, 0x81, 0xa2,  \
+	0x67, 0xe3, 0xb1, 0xe9, 0xa7, 0xdc, 0xda, 0x11, 0x85, 0x43,  \
+	0x6f, 0xe1, 0x41, 0xf7, 0x74, 0x91, 0x20, 0xa3, 0x03, 0x72,  \
+	0x18, 0x13 \
+}
 
 static const struct object_id empty_tree_oid = {
 	.hash = EMPTY_TREE_SHA1_BIN_LITERAL,