diff mbox series

[24/31] archive-tar: make hash size independent

Message ID 20190212012256.1005924-25-sandals@crustytoothpaste.net (mailing list archive)
State New, archived
Headers show
Series Hash function transition part 16 | expand

Commit Message

brian m. carlson Feb. 12, 2019, 1:22 a.m. UTC
Make the tar generation code hash size independent by using
the_hash_algo.  Make the tar parsing code compute the header value based
on the hash algorithm in use. Update a variable name and switch to
hash_to_hex.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 archive-tar.c               |  7 ++++---
 builtin/get-tar-commit-id.c | 11 +++++++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

Comments

René Scharfe Feb. 12, 2019, 7:20 a.m. UTC | #1
Am 12.02.2019 um 02:22 schrieb brian m. carlson:
> diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c
> index 2706fcfaf2..2760549e91 100644
> --- a/builtin/get-tar-commit-id.c
> +++ b/builtin/get-tar-commit-id.c
> @@ -5,6 +5,7 @@
>  #include "commit.h"
>  #include "tar.h"
>  #include "builtin.h"
> +#include "strbuf.h"
>  #include "quote.h"
>
>  static const char builtin_get_tar_commit_id_usage[] =
> @@ -21,6 +22,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
>  	char *content = buffer + RECORDSIZE;
>  	const char *comment;
>  	ssize_t n;
> +	char *hdrprefix;
> +	int ret;
>
>  	if (argc != 1)
>  		usage(builtin_get_tar_commit_id_usage);
> @@ -32,10 +35,14 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
>  		die_errno("git get-tar-commit-id: EOF before reading tar header");
>  	if (header->typeflag[0] != 'g')
>  		return 1;
> -	if (!skip_prefix(content, "52 comment=", &comment))
> +
> +	hdrprefix = xstrfmt("%zu comment=", the_hash_algo->hexsz + strlen(" comment=") + 2 + 1);
> +	ret = skip_prefix(content, hdrprefix, &comment);
> +	free(hdrprefix);
> +	if (!ret)
>  		return 1;
>
> -	if (write_in_full(1, comment, 41) < 0)
> +	if (write_in_full(1, comment, the_hash_algo->hexsz + 1) < 0)
>  		die_errno("git get-tar-commit-id: write error");
>
>  	return 0;

That command currently prints the pax comment in tar archives if it
looks like a SHA1 hash based on its length.  It should continue to do
so, and _also_ show longer hashes.  Your change makes it _only_ show
those longer hashes.

So it could check for all known valid hash lengths in turn, or accept
any payload length between 40 and the_hash_algo->hexsz, or loosen up
totally and show comments of any length.

René
René Scharfe Feb. 12, 2019, 5:33 p.m. UTC | #2
Am 12.02.2019 um 08:20 schrieb René Scharfe:
> That command currently prints the pax comment in tar archives if it
> looks like a SHA1 hash based on its length.  It should continue to do
> so, and _also_ show longer hashes.  Your change makes it _only_ show
> those longer hashes.
>
> So it could check for all known valid hash lengths in turn, or accept
> any payload length between 40 and the_hash_algo->hexsz, or loosen up
> totally and show comments of any length.

How about the following patch, as a preparation?

-- >8 --
From: Rene Scharfe <l.s.r@web.de>
Subject: [PATCH] get-tar-commit-id: parse comment record

Parse pax comment records properly and get rid of magic numbers for
acceptable comment length.  This simplifies a later change to handle
longer hashes.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 builtin/get-tar-commit-id.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c
index 2706fcfaf2..312e44ed05 100644
--- a/builtin/get-tar-commit-id.c
+++ b/builtin/get-tar-commit-id.c
@@ -21,6 +21,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
 	char *content = buffer + RECORDSIZE;
 	const char *comment;
 	ssize_t n;
+	long len;
+	char *end;

 	if (argc != 1)
 		usage(builtin_get_tar_commit_id_usage);
@@ -32,10 +34,17 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
 		die_errno("git get-tar-commit-id: EOF before reading tar header");
 	if (header->typeflag[0] != 'g')
 		return 1;
-	if (!skip_prefix(content, "52 comment=", &comment))
+
+	len = strtol(content, &end, 10);
+	if (errno == ERANGE || end == content || len < 0)
+		return 1;
+	if (!skip_prefix(end, " comment=", &comment))
+		return 1;
+	len -= comment - content;
+	if (len != GIT_SHA1_HEXSZ + 1)
 		return 1;

-	if (write_in_full(1, comment, 41) < 0)
+	if (write_in_full(1, comment, len) < 0)
 		die_errno("git get-tar-commit-id: write error");

 	return 0;
--
2.20.1
brian m. carlson Feb. 13, 2019, 12:11 a.m. UTC | #3
On Tue, Feb 12, 2019 at 06:33:39PM +0100, René Scharfe wrote:
> Am 12.02.2019 um 08:20 schrieb René Scharfe:
> > That command currently prints the pax comment in tar archives if it
> > looks like a SHA1 hash based on its length.  It should continue to do
> > so, and _also_ show longer hashes.  Your change makes it _only_ show
> > those longer hashes.
> >
> > So it could check for all known valid hash lengths in turn, or accept
> > any payload length between 40 and the_hash_algo->hexsz, or loosen up
> > totally and show comments of any length.
> 
> How about the following patch, as a preparation?
> 
> -- >8 --
> From: Rene Scharfe <l.s.r@web.de>
> Subject: [PATCH] get-tar-commit-id: parse comment record
> 
> Parse pax comment records properly and get rid of magic numbers for
> acceptable comment length.  This simplifies a later change to handle
> longer hashes.
> 
> Signed-off-by: Rene Scharfe <l.s.r@web.de>
> ---
>  builtin/get-tar-commit-id.c | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c
> index 2706fcfaf2..312e44ed05 100644
> --- a/builtin/get-tar-commit-id.c
> +++ b/builtin/get-tar-commit-id.c
> @@ -21,6 +21,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
>  	char *content = buffer + RECORDSIZE;
>  	const char *comment;
>  	ssize_t n;
> +	long len;
> +	char *end;
> 
>  	if (argc != 1)
>  		usage(builtin_get_tar_commit_id_usage);
> @@ -32,10 +34,17 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
>  		die_errno("git get-tar-commit-id: EOF before reading tar header");
>  	if (header->typeflag[0] != 'g')
>  		return 1;
> -	if (!skip_prefix(content, "52 comment=", &comment))
> +
> +	len = strtol(content, &end, 10);
> +	if (errno == ERANGE || end == content || len < 0)
> +		return 1;
> +	if (!skip_prefix(end, " comment=", &comment))
> +		return 1;
> +	len -= comment - content;
> +	if (len != GIT_SHA1_HEXSZ + 1)
>  		return 1;

So it turns out I wrote a different patch for this in a later series,
but I like this style better. I'm going to squash my existing patches
together and then rework them such that they're based off yours.

My technique iterated over the entire header, comparing the entire
header, which is much less elegant than this solution.
diff mbox series

Patch

diff --git a/archive-tar.c b/archive-tar.c
index 4aabd566fb..a5ba55c11e 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -326,14 +326,15 @@  static int write_tar_entry(struct archiver_args *args,
 
 static void write_global_extended_header(struct archiver_args *args)
 {
-	const unsigned char *sha1 = args->commit_sha1;
+	const unsigned char *hash = args->commit_sha1;
 	struct strbuf ext_header = STRBUF_INIT;
 	struct ustar_header header;
 	unsigned int mode;
 
-	if (sha1)
+	if (hash)
 		strbuf_append_ext_header(&ext_header, "comment",
-					 sha1_to_hex(sha1), 40);
+					 hash_to_hex(hash),
+					 the_hash_algo->hexsz);
 	if (args->time > USTAR_MAX_MTIME) {
 		strbuf_append_ext_header_uint(&ext_header, "mtime",
 					      args->time);
diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c
index 2706fcfaf2..2760549e91 100644
--- a/builtin/get-tar-commit-id.c
+++ b/builtin/get-tar-commit-id.c
@@ -5,6 +5,7 @@ 
 #include "commit.h"
 #include "tar.h"
 #include "builtin.h"
+#include "strbuf.h"
 #include "quote.h"
 
 static const char builtin_get_tar_commit_id_usage[] =
@@ -21,6 +22,8 @@  int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
 	char *content = buffer + RECORDSIZE;
 	const char *comment;
 	ssize_t n;
+	char *hdrprefix;
+	int ret;
 
 	if (argc != 1)
 		usage(builtin_get_tar_commit_id_usage);
@@ -32,10 +35,14 @@  int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix)
 		die_errno("git get-tar-commit-id: EOF before reading tar header");
 	if (header->typeflag[0] != 'g')
 		return 1;
-	if (!skip_prefix(content, "52 comment=", &comment))
+
+	hdrprefix = xstrfmt("%zu comment=", the_hash_algo->hexsz + strlen(" comment=") + 2 + 1);
+	ret = skip_prefix(content, hdrprefix, &comment);
+	free(hdrprefix);
+	if (!ret)
 		return 1;
 
-	if (write_in_full(1, comment, 41) < 0)
+	if (write_in_full(1, comment, the_hash_algo->hexsz + 1) < 0)
 		die_errno("git get-tar-commit-id: write error");
 
 	return 0;