Message ID | 20190212012256.1005924-25-sandals@crustytoothpaste.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Hash function transition part 16 | expand |
Am 12.02.2019 um 02:22 schrieb brian m. carlson: > diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c > index 2706fcfaf2..2760549e91 100644 > --- a/builtin/get-tar-commit-id.c > +++ b/builtin/get-tar-commit-id.c > @@ -5,6 +5,7 @@ > #include "commit.h" > #include "tar.h" > #include "builtin.h" > +#include "strbuf.h" > #include "quote.h" > > static const char builtin_get_tar_commit_id_usage[] = > @@ -21,6 +22,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) > char *content = buffer + RECORDSIZE; > const char *comment; > ssize_t n; > + char *hdrprefix; > + int ret; > > if (argc != 1) > usage(builtin_get_tar_commit_id_usage); > @@ -32,10 +35,14 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) > die_errno("git get-tar-commit-id: EOF before reading tar header"); > if (header->typeflag[0] != 'g') > return 1; > - if (!skip_prefix(content, "52 comment=", &comment)) > + > + hdrprefix = xstrfmt("%zu comment=", the_hash_algo->hexsz + strlen(" comment=") + 2 + 1); > + ret = skip_prefix(content, hdrprefix, &comment); > + free(hdrprefix); > + if (!ret) > return 1; > > - if (write_in_full(1, comment, 41) < 0) > + if (write_in_full(1, comment, the_hash_algo->hexsz + 1) < 0) > die_errno("git get-tar-commit-id: write error"); > > return 0; That command currently prints the pax comment in tar archives if it looks like a SHA1 hash based on its length. It should continue to do so, and _also_ show longer hashes. Your change makes it _only_ show those longer hashes. So it could check for all known valid hash lengths in turn, or accept any payload length between 40 and the_hash_algo->hexsz, or loosen up totally and show comments of any length. René
Am 12.02.2019 um 08:20 schrieb René Scharfe: > That command currently prints the pax comment in tar archives if it > looks like a SHA1 hash based on its length. It should continue to do > so, and _also_ show longer hashes. Your change makes it _only_ show > those longer hashes. > > So it could check for all known valid hash lengths in turn, or accept > any payload length between 40 and the_hash_algo->hexsz, or loosen up > totally and show comments of any length. How about the following patch, as a preparation? -- >8 -- From: Rene Scharfe <l.s.r@web.de> Subject: [PATCH] get-tar-commit-id: parse comment record Parse pax comment records properly and get rid of magic numbers for acceptable comment length. This simplifies a later change to handle longer hashes. Signed-off-by: Rene Scharfe <l.s.r@web.de> --- builtin/get-tar-commit-id.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c index 2706fcfaf2..312e44ed05 100644 --- a/builtin/get-tar-commit-id.c +++ b/builtin/get-tar-commit-id.c @@ -21,6 +21,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) char *content = buffer + RECORDSIZE; const char *comment; ssize_t n; + long len; + char *end; if (argc != 1) usage(builtin_get_tar_commit_id_usage); @@ -32,10 +34,17 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) die_errno("git get-tar-commit-id: EOF before reading tar header"); if (header->typeflag[0] != 'g') return 1; - if (!skip_prefix(content, "52 comment=", &comment)) + + len = strtol(content, &end, 10); + if (errno == ERANGE || end == content || len < 0) + return 1; + if (!skip_prefix(end, " comment=", &comment)) + return 1; + len -= comment - content; + if (len != GIT_SHA1_HEXSZ + 1) return 1; - if (write_in_full(1, comment, 41) < 0) + if (write_in_full(1, comment, len) < 0) die_errno("git get-tar-commit-id: write error"); return 0; -- 2.20.1
On Tue, Feb 12, 2019 at 06:33:39PM +0100, René Scharfe wrote: > Am 12.02.2019 um 08:20 schrieb René Scharfe: > > That command currently prints the pax comment in tar archives if it > > looks like a SHA1 hash based on its length. It should continue to do > > so, and _also_ show longer hashes. Your change makes it _only_ show > > those longer hashes. > > > > So it could check for all known valid hash lengths in turn, or accept > > any payload length between 40 and the_hash_algo->hexsz, or loosen up > > totally and show comments of any length. > > How about the following patch, as a preparation? > > -- >8 -- > From: Rene Scharfe <l.s.r@web.de> > Subject: [PATCH] get-tar-commit-id: parse comment record > > Parse pax comment records properly and get rid of magic numbers for > acceptable comment length. This simplifies a later change to handle > longer hashes. > > Signed-off-by: Rene Scharfe <l.s.r@web.de> > --- > builtin/get-tar-commit-id.c | 13 +++++++++++-- > 1 file changed, 11 insertions(+), 2 deletions(-) > > diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c > index 2706fcfaf2..312e44ed05 100644 > --- a/builtin/get-tar-commit-id.c > +++ b/builtin/get-tar-commit-id.c > @@ -21,6 +21,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) > char *content = buffer + RECORDSIZE; > const char *comment; > ssize_t n; > + long len; > + char *end; > > if (argc != 1) > usage(builtin_get_tar_commit_id_usage); > @@ -32,10 +34,17 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) > die_errno("git get-tar-commit-id: EOF before reading tar header"); > if (header->typeflag[0] != 'g') > return 1; > - if (!skip_prefix(content, "52 comment=", &comment)) > + > + len = strtol(content, &end, 10); > + if (errno == ERANGE || end == content || len < 0) > + return 1; > + if (!skip_prefix(end, " comment=", &comment)) > + return 1; > + len -= comment - content; > + if (len != GIT_SHA1_HEXSZ + 1) > return 1; So it turns out I wrote a different patch for this in a later series, but I like this style better. I'm going to squash my existing patches together and then rework them such that they're based off yours. My technique iterated over the entire header, comparing the entire header, which is much less elegant than this solution.
diff --git a/archive-tar.c b/archive-tar.c index 4aabd566fb..a5ba55c11e 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -326,14 +326,15 @@ static int write_tar_entry(struct archiver_args *args, static void write_global_extended_header(struct archiver_args *args) { - const unsigned char *sha1 = args->commit_sha1; + const unsigned char *hash = args->commit_sha1; struct strbuf ext_header = STRBUF_INIT; struct ustar_header header; unsigned int mode; - if (sha1) + if (hash) strbuf_append_ext_header(&ext_header, "comment", - sha1_to_hex(sha1), 40); + hash_to_hex(hash), + the_hash_algo->hexsz); if (args->time > USTAR_MAX_MTIME) { strbuf_append_ext_header_uint(&ext_header, "mtime", args->time); diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c index 2706fcfaf2..2760549e91 100644 --- a/builtin/get-tar-commit-id.c +++ b/builtin/get-tar-commit-id.c @@ -5,6 +5,7 @@ #include "commit.h" #include "tar.h" #include "builtin.h" +#include "strbuf.h" #include "quote.h" static const char builtin_get_tar_commit_id_usage[] = @@ -21,6 +22,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) char *content = buffer + RECORDSIZE; const char *comment; ssize_t n; + char *hdrprefix; + int ret; if (argc != 1) usage(builtin_get_tar_commit_id_usage); @@ -32,10 +35,14 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) die_errno("git get-tar-commit-id: EOF before reading tar header"); if (header->typeflag[0] != 'g') return 1; - if (!skip_prefix(content, "52 comment=", &comment)) + + hdrprefix = xstrfmt("%zu comment=", the_hash_algo->hexsz + strlen(" comment=") + 2 + 1); + ret = skip_prefix(content, hdrprefix, &comment); + free(hdrprefix); + if (!ret) return 1; - if (write_in_full(1, comment, 41) < 0) + if (write_in_full(1, comment, the_hash_algo->hexsz + 1) < 0) die_errno("git get-tar-commit-id: write error"); return 0;
Make the tar generation code hash size independent by using the_hash_algo. Make the tar parsing code compute the header value based on the hash algorithm in use. Update a variable name and switch to hash_to_hex. Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> --- archive-tar.c | 7 ++++--- builtin/get-tar-commit-id.c | 11 +++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-)