diff mbox series

[12/31] fast-import: make hash-size independent

Message ID 20190212012256.1005924-13-sandals@crustytoothpaste.net (mailing list archive)
State New, archived
Headers show
Series Hash function transition part 16 | expand

Commit Message

brian m. carlson Feb. 12, 2019, 1:22 a.m. UTC
Replace several uses of GIT_SHA1_HEXSZ and 40-based constants with
references to the_hash_algo.  Update the note handling code here to
compute path sizes based on GIT_MAX_RAWSZ as well.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 fast-import.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

Comments

Eric Sunshine Feb. 12, 2019, 3:44 a.m. UTC | #1
On Mon, Feb 11, 2019 at 8:23 PM brian m. carlson
<sandals@crustytoothpaste.net> wrote:
> Replace several uses of GIT_SHA1_HEXSZ and 40-based constants with
> references to the_hash_algo.  Update the note handling code here to
> compute path sizes based on GIT_MAX_RAWSZ as well.
>
> Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
> ---
> diff --git a/fast-import.c b/fast-import.c
> @@ -2047,7 +2047,8 @@ static uintmax_t do_change_note_fanout(
> -       char realpath[60];
> +       char realpath[GIT_MAX_RAWSZ * 3];

I wonder if the fixed multiplier deserves a comment explaining that
this is reserving enough space for a hex representation with '/'
between each digit pair plus NUL. Which leads to the next question: Is
there is GIT_MAX_HEXSZ constant? If so, this might be more clearly
represented (or not) by taking advantage of that value.

Also, there are a number of hardcoded 60's in the code earlier in this
file, such as:

    if ((max_packsize && (pack_size + 60 + len) > max_packsize)
        || (pack_size + 60 + len) < pack_size)
        cycle_packfile();

Is that just a coincidence or is it related to the 60 characters
allocated for 'realpath'?

> @@ -2405,7 +2406,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
>                 char *buf = read_object_with_reference(&commit_oid,
>                                                        commit_type, &size,
>                                                        &commit_oid);
> -               if (!buf || size < 46)
> +               if (!buf || size < the_hash_algo->hexsz)

What exactly did the 46 represent and how does it relate to 'hexsz'?
Stated differently, why didn't this become:

    the_hash_algo->hexsz + 6'

?

> @@ -2456,7 +2457,7 @@ static void file_change_deleteall(struct branch *b)
>  static void parse_from_commit(struct branch *b, char *buf, unsigned long size)
>  {
> -       if (!buf || size < GIT_SHA1_HEXSZ + 6)
> +       if (!buf || size < the_hash_algo->hexsz + 6)

...as it seems to have here...

> @@ -2555,7 +2556,7 @@ static struct hash_list *parse_merge(unsigned int *count)
> -                       if (!buf || size < 46)
> +                       if (!buf || size < the_hash_algo->hexsz)

...but not here.
brian m. carlson Feb. 12, 2019, 11:36 p.m. UTC | #2
On Mon, Feb 11, 2019 at 10:44:12PM -0500, Eric Sunshine wrote:
> On Mon, Feb 11, 2019 at 8:23 PM brian m. carlson
> <sandals@crustytoothpaste.net> wrote:
> > Replace several uses of GIT_SHA1_HEXSZ and 40-based constants with
> > references to the_hash_algo.  Update the note handling code here to
> > compute path sizes based on GIT_MAX_RAWSZ as well.
> >
> > Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
> > ---
> > diff --git a/fast-import.c b/fast-import.c
> > @@ -2047,7 +2047,8 @@ static uintmax_t do_change_note_fanout(
> > -       char realpath[60];
> > +       char realpath[GIT_MAX_RAWSZ * 3];
> 
> I wonder if the fixed multiplier deserves a comment explaining that
> this is reserving enough space for a hex representation with '/'
> between each digit pair plus NUL. Which leads to the next question: Is
> there is GIT_MAX_HEXSZ constant? If so, this might be more clearly
> represented (or not) by taking advantage of that value.

There is such a constant. I'll add a comment and see if I can write it
in a way that makes it more intuitive what we're computing.

> Also, there are a number of hardcoded 60's in the code earlier in this
> file, such as:
> 
>     if ((max_packsize && (pack_size + 60 + len) > max_packsize)
>         || (pack_size + 60 + len) < pack_size)
>         cycle_packfile();
> 
> Is that just a coincidence or is it related to the 60 characters
> allocated for 'realpath'?

That's an interesting question. It looks like these are indeed related
to the hash size, but they're not related to the realpath call above.
I'll convert them as well.

> > @@ -2405,7 +2406,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
> >                 char *buf = read_object_with_reference(&commit_oid,
> >                                                        commit_type, &size,
> >                                                        &commit_oid);
> > -               if (!buf || size < 46)
> > +               if (!buf || size < the_hash_algo->hexsz)
> 
> What exactly did the 46 represent and how does it relate to 'hexsz'?
> Stated differently, why didn't this become:
> 
>     the_hash_algo->hexsz + 6'
> 
> ?

Good catch. I believe that 46 is the number of bytes in "tree %s\n",
which is the smallest possible commit (a root commit without an author
or committer).

I'll fix this misconversion.
diff mbox series

Patch

diff --git a/fast-import.c b/fast-import.c
index 7c9a10a77b..eba8c8c919 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1241,7 +1241,7 @@  static void load_tree(struct tree_entry *root)
 		c += e->name->str_len + 1;
 		hashcpy(e->versions[0].oid.hash, (unsigned char *)c);
 		hashcpy(e->versions[1].oid.hash, (unsigned char *)c);
-		c += GIT_SHA1_RAWSZ;
+		c += the_hash_algo->rawsz;
 	}
 	free(buf);
 }
@@ -1288,7 +1288,7 @@  static void mktree(struct tree_content *t, int v, struct strbuf *b)
 		strbuf_addf(b, "%o %s%c",
 			(unsigned int)(e->versions[v].mode & ~NO_DELTA),
 			e->name->str_dat, '\0');
-		strbuf_add(b, e->versions[v].oid.hash, GIT_SHA1_RAWSZ);
+		strbuf_add(b, e->versions[v].oid.hash, the_hash_algo->rawsz);
 	}
 }
 
@@ -2047,7 +2047,8 @@  static uintmax_t do_change_note_fanout(
 	unsigned int i, tmp_hex_oid_len, tmp_fullpath_len;
 	uintmax_t num_notes = 0;
 	struct object_id oid;
-	char realpath[60];
+	char realpath[GIT_MAX_RAWSZ * 3];
+	const unsigned hexsz = the_hash_algo->hexsz;
 
 	if (!root->tree)
 		load_tree(root);
@@ -2067,7 +2068,7 @@  static uintmax_t do_change_note_fanout(
 		 * of 2 chars.
 		 */
 		if (!e->versions[1].mode ||
-		    tmp_hex_oid_len > GIT_SHA1_HEXSZ ||
+		    tmp_hex_oid_len > hexsz ||
 		    e->name->str_len % 2)
 			continue;
 
@@ -2081,7 +2082,7 @@  static uintmax_t do_change_note_fanout(
 		tmp_fullpath_len += e->name->str_len;
 		fullpath[tmp_fullpath_len] = '\0';
 
-		if (tmp_hex_oid_len == GIT_SHA1_HEXSZ && !get_oid_hex(hex_oid, &oid)) {
+		if (tmp_hex_oid_len == hexsz && !get_oid_hex(hex_oid, &oid)) {
 			/* This is a note entry */
 			if (fanout == 0xff) {
 				/* Counting mode, no rename */
@@ -2352,7 +2353,7 @@  static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 	struct object_entry *oe;
 	struct branch *s;
 	struct object_id oid, commit_oid;
-	char path[60];
+	char path[GIT_MAX_RAWSZ * 3];
 	uint16_t inline_data = 0;
 	unsigned char new_fanout;
 
@@ -2405,7 +2406,7 @@  static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 		char *buf = read_object_with_reference(&commit_oid,
 						       commit_type, &size,
 						       &commit_oid);
-		if (!buf || size < 46)
+		if (!buf || size < the_hash_algo->hexsz)
 			die("Not a valid commit: %s", p);
 		free(buf);
 	} else
@@ -2456,7 +2457,7 @@  static void file_change_deleteall(struct branch *b)
 
 static void parse_from_commit(struct branch *b, char *buf, unsigned long size)
 {
-	if (!buf || size < GIT_SHA1_HEXSZ + 6)
+	if (!buf || size < the_hash_algo->hexsz + 6)
 		die("Not a valid commit: %s", oid_to_hex(&b->oid));
 	if (memcmp("tree ", buf, 5)
 		|| get_oid_hex(buf + 5, &b->branch_tree.versions[1].oid))
@@ -2555,7 +2556,7 @@  static struct hash_list *parse_merge(unsigned int *count)
 			char *buf = read_object_with_reference(&n->oid,
 							       commit_type,
 							       &size, &n->oid);
-			if (!buf || size < 46)
+			if (!buf || size < the_hash_algo->hexsz)
 				die("Not a valid commit: %s", from);
 			free(buf);
 		} else
@@ -2842,7 +2843,7 @@  static void parse_get_mark(const char *p)
 		die("Unknown mark: %s", command_buf.buf);
 
 	xsnprintf(output, sizeof(output), "%s\n", oid_to_hex(&oe->idx.oid));
-	cat_blob_write(output, GIT_SHA1_HEXSZ + 1);
+	cat_blob_write(output, the_hash_algo->hexsz + 1);
 }
 
 static void parse_cat_blob(const char *p)
@@ -2872,6 +2873,8 @@  static struct object_entry *dereference(struct object_entry *oe,
 {
 	unsigned long size;
 	char *buf = NULL;
+	const unsigned hexsz = the_hash_algo->hexsz;
+
 	if (!oe) {
 		enum object_type type = oid_object_info(the_repository, oid,
 							NULL);
@@ -2905,12 +2908,12 @@  static struct object_entry *dereference(struct object_entry *oe,
 	/* Peel one layer. */
 	switch (oe->type) {
 	case OBJ_TAG:
-		if (size < GIT_SHA1_HEXSZ + strlen("object ") ||
+		if (size < hexsz + strlen("object ") ||
 		    get_oid_hex(buf + strlen("object "), oid))
 			die("Invalid SHA1 in tag: %s", command_buf.buf);
 		break;
 	case OBJ_COMMIT:
-		if (size < GIT_SHA1_HEXSZ + strlen("tree ") ||
+		if (size < hexsz + strlen("tree ") ||
 		    get_oid_hex(buf + strlen("tree "), oid))
 			die("Invalid SHA1 in commit: %s", command_buf.buf);
 	}