diff mbox series

[12/15] chunk-format: create write_chunks()

Message ID 03f3255c8f4a953065b2ff8e61816f83534c23ed.1607012215.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Refactor chunk-format into an API | expand

Commit Message

Derrick Stolee Dec. 3, 2020, 4:16 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

The commit-graph and multi-pack-index files both use a chunk-based file
format. They have already unified on using write_table_of_contents(),
but we expand upon that by unifying their chunk writing loop.

This takes the concepts already present in the commit-graph that were
dropped in the multi-pack-index code during refactoring, including:

* Check the hashfile for how much data was written by each write_fn.

* Allow write_fn() to report an error that results in a failure
  without using die() in the low-level commands.

This simplifies the code in commit-graph.c and midx.c while laying the
foundation for future formats using similar ideas.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 chunk-format.c | 23 +++++++++++++++++++++++
 chunk-format.h | 13 +++++++++++++
 commit-graph.c | 13 ++-----------
 midx.c         |  3 +--
 4 files changed, 39 insertions(+), 13 deletions(-)

Comments

Taylor Blau Dec. 8, 2020, 6:45 p.m. UTC | #1
On Thu, Dec 03, 2020 at 04:16:51PM +0000, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <dstolee@microsoft.com>
>
> The commit-graph and multi-pack-index files both use a chunk-based file
> format. They have already unified on using write_table_of_contents(),
> but we expand upon that by unifying their chunk writing loop.
>
> This takes the concepts already present in the commit-graph that were
> dropped in the multi-pack-index code during refactoring, including:
>
> * Check the hashfile for how much data was written by each write_fn.
>
> * Allow write_fn() to report an error that results in a failure
>   without using die() in the low-level commands.
>
> This simplifies the code in commit-graph.c and midx.c while laying the
> foundation for future formats using similar ideas.
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  chunk-format.c | 23 +++++++++++++++++++++++
>  chunk-format.h | 13 +++++++++++++
>  commit-graph.c | 13 ++-----------
>  midx.c         |  3 +--
>  4 files changed, 39 insertions(+), 13 deletions(-)
>
> diff --git a/chunk-format.c b/chunk-format.c
> index 771b6d98d0..a6643a4fc8 100644
> --- a/chunk-format.c
> +++ b/chunk-format.c
> @@ -24,3 +24,26 @@ void write_table_of_contents(struct hashfile *f,
>  	hashwrite_be32(f, 0);
>  	hashwrite_be64(f, cur_offset);
>  }
> +
> +int write_chunks(struct hashfile *f,
> +		 struct chunk_info *chunks,
> +		 int nr,
> +		 void *data)
> +{

Serves me right for thinking that a function like write_chunks() would
be a good addition to this series without... actually reading the whole
series first ;-).

I'm glad to see that you're adding such a function, but I think that I
prefer my version which moves these four parameters into the chunkfile
struct. That allows for other functions to be created which can, for
e.g., manage the chunks themselves (like my chunkfile_push_chunk()).

Thanks,
Taylor
diff mbox series

Patch

diff --git a/chunk-format.c b/chunk-format.c
index 771b6d98d0..a6643a4fc8 100644
--- a/chunk-format.c
+++ b/chunk-format.c
@@ -24,3 +24,26 @@  void write_table_of_contents(struct hashfile *f,
 	hashwrite_be32(f, 0);
 	hashwrite_be64(f, cur_offset);
 }
+
+int write_chunks(struct hashfile *f,
+		 struct chunk_info *chunks,
+		 int nr,
+		 void *data)
+{
+	int i;
+
+	for (i = 0; i < nr; i++) {
+		uint64_t start_offset = f->total + f->offset;
+		int result = chunks[i].write_fn(f, data);
+
+		if (result)
+			return result;
+
+		if (f->total + f->offset != start_offset + chunks[i].size)
+			BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
+			    chunks[i].size, chunks[i].id,
+			    f->total + f->offset - start_offset);
+	}
+
+	return 0;
+}
diff --git a/chunk-format.h b/chunk-format.h
index 4b9cbeb372..a2c7ddb23b 100644
--- a/chunk-format.h
+++ b/chunk-format.h
@@ -33,4 +33,17 @@  void write_table_of_contents(struct hashfile *f,
 			     struct chunk_info *chunks,
 			     int nr);
 
+/*
+ * Write the data for the given chunk list using the provided
+ * write_fn values. The given 'data' parameter is passed to those
+ * methods.
+ *
+ * The data that is written by each write_fn is checked to be of
+ * the expected size, and a BUG() is thrown if not specified correctly.
+ */
+int write_chunks(struct hashfile *f,
+		 struct chunk_info *chunks,
+		 int nr,
+		 void *data);
+
 #endif
diff --git a/commit-graph.c b/commit-graph.c
index 5494fda1d3..10dcef9d6b 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1809,17 +1809,8 @@  static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 			num_chunks * ctx->commits.nr);
 	}
 
-	for (i = 0; i < num_chunks; i++) {
-		uint64_t start_offset = f->total + f->offset;
-
-		if (chunks[i].write_fn(f, ctx))
-			return -1;
-
-		if (f->total + f->offset != start_offset + chunks[i].size)
-			BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
-			    chunks[i].size, chunks[i].id,
-			    f->total + f->offset - start_offset);
-	}
+	if (write_chunks(f, chunks, num_chunks, ctx))
+		return -1;
 
 	stop_progress(&ctx->progress);
 	strbuf_release(&progress_title);
diff --git a/midx.c b/midx.c
index 47f5f60fcd..67ac232a81 100644
--- a/midx.c
+++ b/midx.c
@@ -957,8 +957,7 @@  static int write_midx_internal(const char *object_dir, struct multi_pack_index *
 
 	write_table_of_contents(f, header_size, chunks, num_chunks);
 
-	for (i = 0; i < num_chunks; i++)
-		chunks[i].write_fn(f, &ctx);
+	result = write_chunks(f, chunks, num_chunks, &ctx);
 
 	finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
 	commit_lock_file(&lk);