[1/3] csum-file: add nested_hashfile()

Message ID	0eca529766fcbe70147bc28ce7f0692e645fd118.1616785928.git.gitgitgadget@gmail.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <git-owner@kernel.org> Message-Id: <0eca529766fcbe70147bc28ce7f0692e645fd118.1616785928.git.gitgitgadget@gmail.com> In-Reply-To: <pull.916.git.1616785928.gitgitgadget@gmail.com> References: <pull.916.git.1616785928.gitgitgadget@gmail.com> Date: Fri, 26 Mar 2021 19:12:05 +0000 Subject: [PATCH 1/3] csum-file: add nested_hashfile() Fcc: Sent Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIME-Version: 1.0 To: git@vger.kernel.org Cc: gitster@pobox.com, peff@peff.net, stolee@gmail.com, git@jeffhostetler.com, Derrick Stolee <derrickstolee@github.com>, Derrick Stolee <dstolee@microsoft.com> Precedence: bulk From: Derrick Stolee <dstolee@microsoft.com>
Series	Convert index writes to use hashfile API \| expand [0/3] Convert index writes to use hashfile API [1/3] csum-file: add nested_hashfile() [2/3] read-cache: use hashfile instead of git_hash_ctx [3/3] read-cache: delete unused hashing methods

Message ID

0eca529766fcbe70147bc28ce7f0692e645fd118.1616785928.git.gitgitgadget@gmail.com (mailing list archive)

State

New, archived

Headers

Message-Id: 
 <0eca529766fcbe70147bc28ce7f0692e645fd118.1616785928.git.gitgitgadget@gmail.com>
In-Reply-To: <pull.916.git.1616785928.gitgitgadget@gmail.com>
References: <pull.916.git.1616785928.gitgitgadget@gmail.com>
Date: Fri, 26 Mar 2021 19:12:05 +0000
Subject: [PATCH 1/3] csum-file: add nested_hashfile()
Fcc: Sent
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
MIME-Version: 1.0
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net, stolee@gmail.com,
        git@jeffhostetler.com, Derrick Stolee <derrickstolee@github.com>,
        Derrick Stolee <dstolee@microsoft.com>
Precedence: bulk
From: Derrick Stolee <dstolee@microsoft.com>

Series

Convert index writes to use hashfile API | expand

Commit Message

Derrick Stolee March 26, 2021, 7:12 p.m. UTC

From: Derrick Stolee <dstolee@microsoft.com>

The index writing code in do_write_index() uses a custom set of hashing
code, in part because it was introduced before the hashfile API. But
also, the End of Index Entries extension computes a hash of just the
extension data, not the entire file preceding that extension.

Before converting the index writing code to use the hashfile API, create
a concept of a "nested hashfile". By adding a 'base' member to 'struct
hashfile', we indicate that any writes to this hashfile should be passed
along to the base hashfile, too.

In the next change, the index code will use this to create a new
hashfile wose base is the hashfile for the index. The outer hashfile
will compute the hash just for the extension details. Thus, it will
finalize earlier than the base hashfile, hence there is no modification
to finalize_hashfile() here.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 csum-file.c | 22 ++++++++++++++++++++++
 csum-file.h |  9 +++++++++
 2 files changed, 31 insertions(+)

diff --git a/csum-file.c b/csum-file.c
index 0f35fa5ee47c..e73b35316e66 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -13,6 +13,9 @@ 
 
 static void flush(struct hashfile *f, const void *buf, unsigned int count)
 {
+	if (f->base)
+		return;
+
 	if (0 <= f->check_fd && count)  {
 		unsigned char check_buffer[8192];
 		ssize_t ret = read_in_full(f->check_fd, check_buffer, count);
@@ -116,6 +119,9 @@  void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
 		}
 		f->offset = offset;
 	}
+
+	if (f->base)
+		hashwrite(f->base, buf, count);
 }
 
 struct hashfile *hashfd(int fd, const char *name)
@@ -150,6 +156,7 @@  struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp
 	f->name = name;
 	f->do_crc = 0;
 	the_hash_algo->init_fn(&f->ctx);
+	f->base = NULL;
 	return f;
 }
 
@@ -184,3 +191,18 @@  uint32_t crc32_end(struct hashfile *f)
 	f->do_crc = 0;
 	return f->crc32;
 }
+
+struct hashfile *nested_hashfile(struct hashfile *f)
+{
+	struct hashfile *n = xmalloc(sizeof(*f));
+	n->fd = -1;
+	n->check_fd = -1;
+	n->offset = 0;
+	n->total = 0;
+	n->tp = NULL;
+	n->name = NULL;
+	n->do_crc = 0;
+	the_hash_algo->init_fn(&n->ctx);
+	n->base = f;
+	return n;
+}
diff --git a/csum-file.h b/csum-file.h
index e54d53d1d0b3..b8785e7ecb46 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -16,6 +16,7 @@  struct hashfile {
 	const char *name;
 	int do_crc;
 	uint32_t crc32;
+	struct hashfile *base;
 	unsigned char buffer[8192];
 };
 
@@ -42,6 +43,14 @@  void hashflush(struct hashfile *f);
 void crc32_begin(struct hashfile *);
 uint32_t crc32_end(struct hashfile *);
 
+/*
+ * A nested hashfile uses the same interface as a hashfile, and computes
+ * a hash for the input bytes while passing them to the base hashfile
+ * instead of writing them to its own file. This is useful for computing
+ * a hash of a region within a file during the write.
+ */
+struct hashfile *nested_hashfile(struct hashfile *f);
+
 /*
  * Returns the total number of bytes fed to the hashfile so far (including ones
  * that have not been written out to the descriptor yet).

[1/3] csum-file: add nested_hashfile()

Commit Message

Patch