diff mbox series

[14/15] commit-graph: restore duplicate chunk checks

Message ID 106dd51f75699fbf4fc1e46687124995f5ef0278.1607012215.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Refactor chunk-format into an API | expand

Commit Message

Derrick Stolee Dec. 3, 2020, 4:16 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

The previous change introduced read_table_of_contents() in the
chunk-format API, but dropped the duplicate chunk check from the
commit-graph parsing logic. This was done to keep flexibility in the
chunk-format API.

One way to restore this check is to have each chunk_read_fn method check
if it has run before. This is somewhat repetitive. If we determine that
the chunk-format API would be better off with a hard requirement that
chunks are never repeated, then this could be replaced with a check in
chunk-format.c.

For now, only restore the duplicate checks that previously existed in
the commit-graph parsing logic.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-graph.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

Comments

Derrick Stolee Dec. 7, 2020, 1:43 p.m. UTC | #1
On 12/3/2020 11:16 AM, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <dstolee@microsoft.com>
> 
> The previous change introduced read_table_of_contents() in the
> chunk-format API, but dropped the duplicate chunk check from the
> commit-graph parsing logic. This was done to keep flexibility in the
> chunk-format API.

"keep flexibility" is bogus. This is the biggest YAGNI of this
series. Instead, consider the patch below instead which restores
duplicate checks for the commit-graph file AND adds them to the
multi-pack-index file due to the shared API.

This is also roughly half of the added lines from the previous
patch.

Thanks,
-Stolee

-- >8 --

From 0df4959d59d7f9df3e9f6326bb0acb7b84f84980 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <dstolee@microsoft.com>
Date: Mon, 7 Dec 2020 08:36:42 -0500
Subject: [PATCH] chunk-format: restore duplicate chunk checks

Before refactoring into the chunk-format API, the commit-graph parsing
logic included checks for duplicate chunks. It is unlikely that we would
desire a chunk-based file format that allows duplicate chunk IDs in the
table of contents, so add duplicate checks into
read_table_of_contents().

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 chunk-format.c | 15 ++++++++++++++-
 chunk-format.h |  3 +++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/chunk-format.c b/chunk-format.c
index d888ef6ec73..a4891bbd28a 100644
--- a/chunk-format.c
+++ b/chunk-format.c
@@ -57,9 +57,13 @@ int read_table_of_contents(const unsigned char *mfile,
 			   int nr,
 			   void *data)
 {
+	int i;
 	uint32_t chunk_id;
 	const unsigned char *table_of_contents = mfile + toc_offset;
 
+	for (i = 0; i < nr; i++)
+		chunks[i].found = 0;
+
 	while (toc_length--) {
 		int i;
 		uint64_t chunk_offset, next_chunk_offset;
@@ -83,7 +87,16 @@ int read_table_of_contents(const unsigned char *mfile,
 		}
 		for (i = 0; i < nr; i++) {
 			if (chunks[i].id == chunk_id) {
-				int result = chunks[i].read_fn(
+				int result;
+
+				if (chunks[i].found) {
+					error(_("duplicate chunk ID %"PRIx32" found"),
+					      chunk_id);
+					return 1;
+				}
+
+				chunks[i].found = 1;
+				result = chunks[i].read_fn(
 						mfile + chunk_offset,
 						next_chunk_offset - chunk_offset,
 						data);
diff --git a/chunk-format.h b/chunk-format.h
index 7049800f734..de45797223a 100644
--- a/chunk-format.h
+++ b/chunk-format.h
@@ -56,6 +56,9 @@ typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
 struct read_chunk_info {
 	uint32_t id;
 	chunk_read_fn read_fn;
+
+	/* used internally */
+	unsigned found:1;
 };
 
 int read_table_of_contents(const unsigned char *mfile,
diff mbox series

Patch

diff --git a/commit-graph.c b/commit-graph.c
index 0a3ba147df..c0102fceba 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -289,10 +289,20 @@  static int verify_commit_graph_lite(struct commit_graph *g)
 	return 0;
 }
 
+static int report_duplicate(void)
+{
+	warning(_("duplicate chunk detected"));
+	return 1;
+}
+
 static int graph_read_oid_fanout(const unsigned char *chunk_start,
 				 size_t chunk_size, void *data)
 {
 	struct commit_graph *g = (struct commit_graph *)data;
+
+	if (g->chunk_oid_fanout)
+		return report_duplicate();
+
 	g->chunk_oid_fanout = (uint32_t*)chunk_start;
 	return 0;
 }
@@ -301,6 +311,10 @@  static int graph_read_oid_lookup(const unsigned char *chunk_start,
 				 size_t chunk_size, void *data)
 {
 	struct commit_graph *g = (struct commit_graph *)data;
+
+	if (g->chunk_oid_lookup)
+		return report_duplicate();
+
 	g->chunk_oid_lookup = chunk_start;
 	g->num_commits = chunk_size / g->hash_len;
 	return 0;
@@ -310,6 +324,10 @@  static int graph_read_data(const unsigned char *chunk_start,
 				 size_t chunk_size, void *data)
 {
 	struct commit_graph *g = (struct commit_graph *)data;
+
+	if (g->chunk_commit_data)
+		return report_duplicate();
+
 	g->chunk_commit_data = chunk_start;
 	return 0;
 }
@@ -318,6 +336,10 @@  static int graph_read_extra_edges(const unsigned char *chunk_start,
 				  size_t chunk_size, void *data)
 {
 	struct commit_graph *g = (struct commit_graph *)data;
+
+	if (g->chunk_extra_edges)
+		return report_duplicate();
+
 	g->chunk_extra_edges = chunk_start;
 	return 0;
 }
@@ -326,6 +348,10 @@  static int graph_read_base_graphs(const unsigned char *chunk_start,
 				  size_t chunk_size, void *data)
 {
 	struct commit_graph *g = (struct commit_graph *)data;
+
+	if (g->chunk_base_graphs)
+		return report_duplicate();
+
 	g->chunk_base_graphs = chunk_start;
 	return 0;
 }
@@ -334,6 +360,10 @@  static int graph_read_bloom_indices(const unsigned char *chunk_start,
 				    size_t chunk_size, void *data)
 {
 	struct commit_graph *g = (struct commit_graph *)data;
+
+	if (g->chunk_bloom_indexes)
+		return report_duplicate();
+
 	g->chunk_bloom_indexes = chunk_start;
 	return 0;
 }
@@ -343,6 +373,10 @@  static int graph_read_bloom_data(const unsigned char *chunk_start,
 {
 	struct commit_graph *g = (struct commit_graph *)data;
 	uint32_t hash_version;
+
+	if (g->chunk_bloom_data)
+		return report_duplicate();
+
 	g->chunk_bloom_data = chunk_start;
 	hash_version = get_be32(chunk_start);