diff mbox series

[1/3] csum-file: add nested_hashfile()

Message ID 0eca529766fcbe70147bc28ce7f0692e645fd118.1616785928.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Convert index writes to use hashfile API | expand

Commit Message

Derrick Stolee March 26, 2021, 7:12 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

The index writing code in do_write_index() uses a custom set of hashing
code, in part because it was introduced before the hashfile API. But
also, the End of Index Entries extension computes a hash of just the
extension data, not the entire file preceding that extension.

Before converting the index writing code to use the hashfile API, create
a concept of a "nested hashfile". By adding a 'base' member to 'struct
hashfile', we indicate that any writes to this hashfile should be passed
along to the base hashfile, too.

In the next change, the index code will use this to create a new
hashfile wose base is the hashfile for the index. The outer hashfile
will compute the hash just for the extension details. Thus, it will
finalize earlier than the base hashfile, hence there is no modification
to finalize_hashfile() here.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 csum-file.c | 22 ++++++++++++++++++++++
 csum-file.h |  9 +++++++++
 2 files changed, 31 insertions(+)
diff mbox series

Patch

diff --git a/csum-file.c b/csum-file.c
index 0f35fa5ee47c..e73b35316e66 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -13,6 +13,9 @@ 
 
 static void flush(struct hashfile *f, const void *buf, unsigned int count)
 {
+	if (f->base)
+		return;
+
 	if (0 <= f->check_fd && count)  {
 		unsigned char check_buffer[8192];
 		ssize_t ret = read_in_full(f->check_fd, check_buffer, count);
@@ -116,6 +119,9 @@  void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
 		}
 		f->offset = offset;
 	}
+
+	if (f->base)
+		hashwrite(f->base, buf, count);
 }
 
 struct hashfile *hashfd(int fd, const char *name)
@@ -150,6 +156,7 @@  struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp
 	f->name = name;
 	f->do_crc = 0;
 	the_hash_algo->init_fn(&f->ctx);
+	f->base = NULL;
 	return f;
 }
 
@@ -184,3 +191,18 @@  uint32_t crc32_end(struct hashfile *f)
 	f->do_crc = 0;
 	return f->crc32;
 }
+
+struct hashfile *nested_hashfile(struct hashfile *f)
+{
+	struct hashfile *n = xmalloc(sizeof(*f));
+	n->fd = -1;
+	n->check_fd = -1;
+	n->offset = 0;
+	n->total = 0;
+	n->tp = NULL;
+	n->name = NULL;
+	n->do_crc = 0;
+	the_hash_algo->init_fn(&n->ctx);
+	n->base = f;
+	return n;
+}
diff --git a/csum-file.h b/csum-file.h
index e54d53d1d0b3..b8785e7ecb46 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -16,6 +16,7 @@  struct hashfile {
 	const char *name;
 	int do_crc;
 	uint32_t crc32;
+	struct hashfile *base;
 	unsigned char buffer[8192];
 };
 
@@ -42,6 +43,14 @@  void hashflush(struct hashfile *f);
 void crc32_begin(struct hashfile *);
 uint32_t crc32_end(struct hashfile *);
 
+/*
+ * A nested hashfile uses the same interface as a hashfile, and computes
+ * a hash for the input bytes while passing them to the base hashfile
+ * instead of writing them to its own file. This is useful for computing
+ * a hash of a region within a file during the write.
+ */
+struct hashfile *nested_hashfile(struct hashfile *f);
+
 /*
  * Returns the total number of bytes fed to the hashfile so far (including ones
  * that have not been written out to the descriptor yet).