diff mbox series

[28/31] dir: make untracked cache extension hash size independent

Message ID 20190212012256.1005924-29-sandals@crustytoothpaste.net (mailing list archive)
State New, archived
Headers show
Series Hash function transition part 16 | expand

Commit Message

brian m. carlson Feb. 12, 2019, 1:22 a.m. UTC
Instead of using a struct with a flex array member to read and write the
untracked cache extension, use a shorter, fixed-length struct and add
the name and hash data explicitly.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 dir.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Feb. 12, 2019, 11:08 a.m. UTC | #1
On Tue, Feb 12 2019, brian m. carlson wrote:

> Instead of using a struct with a flex array member to read and write the
> untracked cache extension, use a shorter, fixed-length struct and add
> the name and hash data explicitly.
> [...]
>  	struct stat_data info_exclude_stat;
>  	struct stat_data excludes_file_stat;
>  	uint32_t dir_flags;
> -	unsigned char info_exclude_sha1[20];
> -	unsigned char excludes_file_sha1[20];
> -	char exclude_per_dir[FLEX_ARRAY];
>  };

Both this & the follow-up 29/31 look scary since this is an on-disk
structure and this patch & the next one rather than implementing some
transition, just changes the structs & code we use to read & write to
use the current hash size.

So what are we going to do when the "current" size is SHA-256 and our
on-disk cache is SHA-1? It seems like with this we'd at best (I haven't
tested) throw away the SHA-1 UC data, and at worst introduce some silent
persistent read failure.

In any case that seems like something we should have tests for with an
on-disk format, i.e. write in one hash, see what happens when we read in
another, and perhaps instead of not understanding SHA-1 hashes in
SHA-256 mode we'd read the SHA-1 and consult the SHA-1<->SHA-256 lookup
table we're going to be eventually maintaining on the side?
brian m. carlson Feb. 13, 2019, 12:30 a.m. UTC | #2
On Tue, Feb 12, 2019 at 12:08:01PM +0100, Ævar Arnfjörð Bjarmason wrote:
> Both this & the follow-up 29/31 look scary since this is an on-disk
> structure and this patch & the next one rather than implementing some
> transition, just changes the structs & code we use to read & write to
> use the current hash size.
> 
> So what are we going to do when the "current" size is SHA-256 and our
> on-disk cache is SHA-1? It seems like with this we'd at best (I haven't
> tested) throw away the SHA-1 UC data, and at worst introduce some silent
> persistent read failure.

That's a good question. The current design has everything in the .git
directory other than the loose object table and the pack indices in one
algorithm. That is, the untracked cache extension, like the rest of the
index, will always be SHA-256 when the objects are stored in SHA-256.

Having mixed algorithms in the .git directory would add a lot of
complexity.

> In any case that seems like something we should have tests for with an
> on-disk format, i.e. write in one hash, see what happens when we read in
> another, and perhaps instead of not understanding SHA-1 hashes in
> SHA-256 mode we'd read the SHA-1 and consult the SHA-1<->SHA-256 lookup
> table we're going to be eventually maintaining on the side?

Correct. The current plan is that when the transition code is fully
implemented, we'll read object IDs from the user in whatever algorithms
we support, translate them into the default algorithm, and then use
them.
diff mbox series

Patch

diff --git a/dir.c b/dir.c
index b2cabadf25..7503b56918 100644
--- a/dir.c
+++ b/dir.c
@@ -2545,13 +2545,9 @@  struct ondisk_untracked_cache {
 	struct stat_data info_exclude_stat;
 	struct stat_data excludes_file_stat;
 	uint32_t dir_flags;
-	unsigned char info_exclude_sha1[20];
-	unsigned char excludes_file_sha1[20];
-	char exclude_per_dir[FLEX_ARRAY];
 };
 
 #define ouc_offset(x) offsetof(struct ondisk_untracked_cache, x)
-#define ouc_size(len) (ouc_offset(exclude_per_dir) + len + 1)
 
 struct write_data {
 	int index;	   /* number of written untracked_cache_dir */
@@ -2634,20 +2630,21 @@  void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	struct write_data wd;
 	unsigned char varbuf[16];
 	int varint_len;
-	size_t len = strlen(untracked->exclude_per_dir);
+	const unsigned hashsz = the_hash_algo->rawsz;
 
-	FLEX_ALLOC_MEM(ouc, exclude_per_dir, untracked->exclude_per_dir, len);
+	ouc = xcalloc(1, sizeof(*ouc));
 	stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat);
 	stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat);
-	hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.oid.hash);
-	hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.oid.hash);
 	ouc->dir_flags = htonl(untracked->dir_flags);
 
 	varint_len = encode_varint(untracked->ident.len, varbuf);
 	strbuf_add(out, varbuf, varint_len);
 	strbuf_addbuf(out, &untracked->ident);
 
-	strbuf_add(out, ouc, ouc_size(len));
+	strbuf_add(out, ouc, sizeof(*ouc));
+	strbuf_add(out, untracked->ss_info_exclude.oid.hash, hashsz);
+	strbuf_add(out, untracked->ss_excludes_file.oid.hash, hashsz);
+	strbuf_add(out, untracked->exclude_per_dir, strlen(untracked->exclude_per_dir) + 1);
 	FREE_AND_NULL(ouc);
 
 	if (!untracked->root) {
@@ -2834,6 +2831,9 @@  struct untracked_cache *read_untracked_extension(const void *data, unsigned long
 	int ident_len;
 	ssize_t len;
 	const char *exclude_per_dir;
+	const unsigned hashsz = the_hash_algo->rawsz;
+	const unsigned offset = sizeof(struct ondisk_untracked_cache);
+	const unsigned exclude_per_dir_offset = offset + 2 * hashsz;
 
 	if (sz <= 1 || end[-1] != '\0')
 		return NULL;
@@ -2845,7 +2845,7 @@  struct untracked_cache *read_untracked_extension(const void *data, unsigned long
 	ident = (const char *)next;
 	next += ident_len;
 
-	if (next + ouc_size(0) > end)
+	if (next + exclude_per_dir_offset + 1 > end)
 		return NULL;
 
 	uc = xcalloc(1, sizeof(*uc));
@@ -2853,15 +2853,15 @@  struct untracked_cache *read_untracked_extension(const void *data, unsigned long
 	strbuf_add(&uc->ident, ident, ident_len);
 	load_oid_stat(&uc->ss_info_exclude,
 		      next + ouc_offset(info_exclude_stat),
-		      next + ouc_offset(info_exclude_sha1));
+		      next + offset);
 	load_oid_stat(&uc->ss_excludes_file,
 		      next + ouc_offset(excludes_file_stat),
-		      next + ouc_offset(excludes_file_sha1));
+		      next + offset + hashsz);
 	uc->dir_flags = get_be32(next + ouc_offset(dir_flags));
-	exclude_per_dir = (const char *)next + ouc_offset(exclude_per_dir);
+	exclude_per_dir = (const char *)next + exclude_per_dir_offset;
 	uc->exclude_per_dir = xstrdup(exclude_per_dir);
 	/* NUL after exclude_per_dir is covered by sizeof(*ouc) */
-	next += ouc_size(strlen(exclude_per_dir));
+	next += exclude_per_dir_offset + strlen(exclude_per_dir) + 1;
 	if (next >= end)
 		goto done2;