Message ID | 8b959f2f374654aeb87b847560761890c2f9aa2c.1592934430.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | More commit-graph/Bloom filter improvements | expand |
Am 23.06.20 um 19:47 schrieb SZEDER Gábor via GitGitGadget: > From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com> > > In write_commit_graph_file() we now have one block of code filling the > array of 'struct chunk_info' with the IDs and sizes of chunks to be > written, and an other block of code calling the functions responsible > for writing individual chunks. In case of optional chunks like Extra > Edge List an Base Graphs List there is also a condition checking > whether that chunk is necessary/desired, and that same condition is > repeated in both blocks of code. Other, newer chunks have similar > optional conditions. > > Eliminate these repeated conditions by storing the function pointers > responsible for writing individual chunks in the 'struct chunk_info' > array as well, and calling them in a loop to write the commit-graph > file. This will open up the possibility for a bit of foolproofing in > the following patch. You can do that without storing function pointers by selecting the function to use based on the chunk ID -- like parse_commit_graph() does on the read side. Advantage: You don't need to press all write functions into the same mold and can keep their individual signatures. > > Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com> > Signed-off-by: Derrick Stolee <dstolee@microsoft.com> > --- > commit-graph.c | 31 +++++++++++++++++++------------ > 1 file changed, 19 insertions(+), 12 deletions(-) > > diff --git a/commit-graph.c b/commit-graph.c > index f33bfe49b3..086fc2d070 100644 > --- a/commit-graph.c > +++ b/commit-graph.c > @@ -1555,9 +1555,13 @@ static int write_graph_chunk_base(struct hashfile *f, > return 0; > } > > +typedef int (*chunk_write_fn)(struct hashfile *f, > + struct write_commit_graph_context *ctx); > + > struct chunk_info { > uint32_t id; > uint64_t size; > + chunk_write_fn write_fn; > }; > > static int write_commit_graph_file(struct write_commit_graph_context *ctx) > @@ -1615,27 +1619,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) > > chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; > chunks[0].size = GRAPH_FANOUT_SIZE; > + chunks[0].write_fn = write_graph_chunk_fanout; > chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; > chunks[1].size = hashsz * ctx->commits.nr; > + chunks[1].write_fn = write_graph_chunk_oids; > chunks[2].id = GRAPH_CHUNKID_DATA; > chunks[2].size = (hashsz + 16) * ctx->commits.nr; > + chunks[2].write_fn = write_graph_chunk_data; > if (ctx->num_extra_edges) { > chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; > chunks[num_chunks].size = 4 * ctx->num_extra_edges; > + chunks[num_chunks].write_fn = write_graph_chunk_extra_edges; > num_chunks++; > } > if (ctx->changed_paths) { > chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; > chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; > + chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes; > num_chunks++; > chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; > chunks[num_chunks].size = sizeof(uint32_t) * 3 > + ctx->total_bloom_filter_data_size; > + chunks[num_chunks].write_fn = write_graph_chunk_bloom_data; > num_chunks++; > } > if (ctx->num_commit_graphs_after > 1) { > chunks[num_chunks].id = GRAPH_CHUNKID_BASE; > chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); > + chunks[num_chunks].write_fn = write_graph_chunk_base; > num_chunks++; > } > > @@ -1671,19 +1682,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) > progress_title.buf, > num_chunks * ctx->commits.nr); > } > - write_graph_chunk_fanout(f, ctx); > - write_graph_chunk_oids(f, ctx); > - write_graph_chunk_data(f, ctx); > - if (ctx->num_extra_edges) > - write_graph_chunk_extra_edges(f, ctx); > - if (ctx->changed_paths) { > - write_graph_chunk_bloom_indexes(f, ctx); > - write_graph_chunk_bloom_data(f, ctx); > - } > - if (ctx->num_commit_graphs_after > 1 && > - write_graph_chunk_base(f, ctx)) { > - return -1; > + > + for (i = 0; i < num_chunks; i++) { > + if (chunks[i].write_fn(f, ctx)) { > + error(_("failed writing chunk with id %"PRIx32""), > + chunks[i].id); Of all the write functions only write_graph_chunk_base() can return non-zero and it already prints an error message in that case ("failed to write correct number of base graph ids"). Why add this one? > + return -1; > + } > } > + > stop_progress(&ctx->progress); > strbuf_release(&progress_title); > >
On 6/25/2020 3:25 AM, René Scharfe wrote: > Am 23.06.20 um 19:47 schrieb SZEDER Gábor via GitGitGadget: >> From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com> >> >> In write_commit_graph_file() we now have one block of code filling the >> array of 'struct chunk_info' with the IDs and sizes of chunks to be >> written, and an other block of code calling the functions responsible >> for writing individual chunks. In case of optional chunks like Extra >> Edge List an Base Graphs List there is also a condition checking >> whether that chunk is necessary/desired, and that same condition is >> repeated in both blocks of code. Other, newer chunks have similar >> optional conditions. >> >> Eliminate these repeated conditions by storing the function pointers >> responsible for writing individual chunks in the 'struct chunk_info' >> array as well, and calling them in a loop to write the commit-graph >> file. This will open up the possibility for a bit of foolproofing in >> the following patch. > > You can do that without storing function pointers by selecting the > function to use based on the chunk ID -- like parse_commit_graph() does > on the read side. Advantage: You don't need to press all write > functions into the same mold and can keep their individual signatures. I do think that the loop without a switch statement is valuable. It focuses the updates for new chunks to be localized to the section that calculates the offset values. >> >> Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com> >> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> >> --- >> commit-graph.c | 31 +++++++++++++++++++------------ >> 1 file changed, 19 insertions(+), 12 deletions(-) >> >> diff --git a/commit-graph.c b/commit-graph.c >> index f33bfe49b3..086fc2d070 100644 >> --- a/commit-graph.c >> +++ b/commit-graph.c >> @@ -1555,9 +1555,13 @@ static int write_graph_chunk_base(struct hashfile *f, >> return 0; >> } >> >> +typedef int (*chunk_write_fn)(struct hashfile *f, >> + struct write_commit_graph_context *ctx); >> + >> struct chunk_info { >> uint32_t id; >> uint64_t size; >> + chunk_write_fn write_fn; >> }; >> >> static int write_commit_graph_file(struct write_commit_graph_context *ctx) >> @@ -1615,27 +1619,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) >> >> chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; >> chunks[0].size = GRAPH_FANOUT_SIZE; >> + chunks[0].write_fn = write_graph_chunk_fanout; >> chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; >> chunks[1].size = hashsz * ctx->commits.nr; >> + chunks[1].write_fn = write_graph_chunk_oids; >> chunks[2].id = GRAPH_CHUNKID_DATA; >> chunks[2].size = (hashsz + 16) * ctx->commits.nr; >> + chunks[2].write_fn = write_graph_chunk_data; >> if (ctx->num_extra_edges) { >> chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; >> chunks[num_chunks].size = 4 * ctx->num_extra_edges; >> + chunks[num_chunks].write_fn = write_graph_chunk_extra_edges; >> num_chunks++; >> } >> if (ctx->changed_paths) { >> chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; >> chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; >> + chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes; >> num_chunks++; >> chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; >> chunks[num_chunks].size = sizeof(uint32_t) * 3 >> + ctx->total_bloom_filter_data_size; >> + chunks[num_chunks].write_fn = write_graph_chunk_bloom_data; >> num_chunks++; >> } >> if (ctx->num_commit_graphs_after > 1) { >> chunks[num_chunks].id = GRAPH_CHUNKID_BASE; >> chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); >> + chunks[num_chunks].write_fn = write_graph_chunk_base; >> num_chunks++; >> } >> >> @@ -1671,19 +1682,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) >> progress_title.buf, >> num_chunks * ctx->commits.nr); >> } >> - write_graph_chunk_fanout(f, ctx); >> - write_graph_chunk_oids(f, ctx); >> - write_graph_chunk_data(f, ctx); >> - if (ctx->num_extra_edges) >> - write_graph_chunk_extra_edges(f, ctx); >> - if (ctx->changed_paths) { >> - write_graph_chunk_bloom_indexes(f, ctx); >> - write_graph_chunk_bloom_data(f, ctx); >> - } >> - if (ctx->num_commit_graphs_after > 1 && >> - write_graph_chunk_base(f, ctx)) { >> - return -1; >> + >> + for (i = 0; i < num_chunks; i++) { >> + if (chunks[i].write_fn(f, ctx)) { >> + error(_("failed writing chunk with id %"PRIx32""), >> + chunks[i].id); > > Of all the write functions only write_graph_chunk_base() can return > non-zero and it already prints an error message in that case ("failed to > write correct number of base graph ids"). Why add this one? Ok, we can require the chunk methods to write an error() message with appropriate context and simply return -1 here. Thanks, -Stolee
diff --git a/commit-graph.c b/commit-graph.c index f33bfe49b3..086fc2d070 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1555,9 +1555,13 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } +typedef int (*chunk_write_fn)(struct hashfile *f, + struct write_commit_graph_context *ctx); + struct chunk_info { uint32_t id; uint64_t size; + chunk_write_fn write_fn; }; static int write_commit_graph_file(struct write_commit_graph_context *ctx) @@ -1615,27 +1619,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; chunks[0].size = GRAPH_FANOUT_SIZE; + chunks[0].write_fn = write_graph_chunk_fanout; chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; chunks[1].size = hashsz * ctx->commits.nr; + chunks[1].write_fn = write_graph_chunk_oids; chunks[2].id = GRAPH_CHUNKID_DATA; chunks[2].size = (hashsz + 16) * ctx->commits.nr; + chunks[2].write_fn = write_graph_chunk_data; if (ctx->num_extra_edges) { chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; chunks[num_chunks].size = 4 * ctx->num_extra_edges; + chunks[num_chunks].write_fn = write_graph_chunk_extra_edges; num_chunks++; } if (ctx->changed_paths) { chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; + chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes; num_chunks++; chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; chunks[num_chunks].size = sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size; + chunks[num_chunks].write_fn = write_graph_chunk_bloom_data; num_chunks++; } if (ctx->num_commit_graphs_after > 1) { chunks[num_chunks].id = GRAPH_CHUNKID_BASE; chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); + chunks[num_chunks].write_fn = write_graph_chunk_base; num_chunks++; } @@ -1671,19 +1682,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) progress_title.buf, num_chunks * ctx->commits.nr); } - write_graph_chunk_fanout(f, ctx); - write_graph_chunk_oids(f, ctx); - write_graph_chunk_data(f, ctx); - if (ctx->num_extra_edges) - write_graph_chunk_extra_edges(f, ctx); - if (ctx->changed_paths) { - write_graph_chunk_bloom_indexes(f, ctx); - write_graph_chunk_bloom_data(f, ctx); - } - if (ctx->num_commit_graphs_after > 1 && - write_graph_chunk_base(f, ctx)) { - return -1; + + for (i = 0; i < num_chunks; i++) { + if (chunks[i].write_fn(f, ctx)) { + error(_("failed writing chunk with id %"PRIx32""), + chunks[i].id); + return -1; + } } + stop_progress(&ctx->progress); strbuf_release(&progress_title);