Message ID | 6be759a9542114e4de41422efa18491085e19682.1597509583.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Implement Corrected Commit Date | expand |
"Abhishek Kumar via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Abhishek Kumar <abhishekkumar8222@gmail.com> > > In a preparatory step, let's return timestamp_t values from > commit_graph_generation(), use timestamp_t for local variables All right, this is all good. > and define GENERATION_NUMBER_INFINITY as (2 ^ 63 - 1) instead. This needs more detailed examination. There are two similar constants, GENERATION_NUMBER_INFINITY and GENERATION_NUMBER_MAX. The former is used for newest commits outside the commit-graph, while the latter is maximum number that commits in the commit-graph can have (because of the storage limitations). We therefore need GENERATION_NUMBER_INFINITY to be larger than GENERATION_NUMBER_MAX, and it is (and was). The GENERATION_NUMBER_INFINITY is because of the above requirement traditionally taken as maximum value that can be represented in the data type used to store commit's generation number _in memory_, but it can be less. For timestamp_t the maximum value that can be represented is (2 ^ 63 - 1). All right then. > The commit message says nothing about the new symbolic constant GENERATION_NUMBER_V1_INFINITY, though. I'm not sure it is even needed (see comments below). > Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com> > --- > commit-graph.c | 18 +++++++++--------- > commit-graph.h | 4 ++-- > commit-reach.c | 32 ++++++++++++++++---------------- > commit-reach.h | 2 +- > commit.h | 3 ++- > revision.c | 10 +++++----- > upload-pack.c | 2 +- > 7 files changed, 36 insertions(+), 35 deletions(-) I hope that changing the type returned by commit_graph_generation() and stored in 'generation' field of `struct commit_graph_data` would mean that the compiler or at least the linter would catch all the places that need updating the type. Just in case, I have performed a simple code search and it agrees with the above list (one search result missing, in commit.c, was handled by previous patch). > > diff --git a/commit-graph.c b/commit-graph.c > index fb6e2bf18f..7f9f858577 100644 > --- a/commit-graph.c > +++ b/commit-graph.c > @@ -99,7 +99,7 @@ uint32_t commit_graph_position(const struct commit *c) > return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH; > } > > -uint32_t commit_graph_generation(const struct commit *c) > +timestamp_t commit_graph_generation(const struct commit *c) > { > struct commit_graph_data *data = > commit_graph_data_slab_peek(&commit_graph_data_slab, c); > @@ -115,8 +115,8 @@ uint32_t commit_graph_generation(const struct commit *c) > int compare_commits_by_gen(const void *_a, const void *_b) > { > const struct commit *a = _a, *b = _b; > - const uint32_t generation_a = commit_graph_generation(a); > - const uint32_t generation_b = commit_graph_generation(b); > + const timestamp_t generation_a = commit_graph_generation(a); > + const timestamp_t generation_b = commit_graph_generation(b); > > /* older commits first */ > if (generation_a < generation_b) > @@ -159,8 +159,8 @@ static int commit_gen_cmp(const void *va, const void *vb) > const struct commit *a = *(const struct commit **)va; > const struct commit *b = *(const struct commit **)vb; > > - uint32_t generation_a = commit_graph_data_at(a)->generation; > - uint32_t generation_b = commit_graph_data_at(b)->generation; > + const timestamp_t generation_a = commit_graph_data_at(a)->generation; > + const timestamp_t generation_b = commit_graph_data_at(b)->generation; > /* lower generation commits first */ > if (generation_a < generation_b) > return -1; All right. > @@ -1338,7 +1338,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) > uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation; Shouldn't this be - uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation; + timestamp_t generation = commit_graph_data_at(ctx->commits.list[i])->generation; > > display_progress(ctx->progress, i + 1); > - if (generation != GENERATION_NUMBER_INFINITY && > + if (generation != GENERATION_NUMBER_V1_INFINITY && Then there would be no need for this change, isn't it? > generation != GENERATION_NUMBER_ZERO) > continue; > > @@ -1352,7 +1352,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) > for (parent = current->parents; parent; parent = parent->next) { > generation = commit_graph_data_at(parent->item)->generation; > > - if (generation == GENERATION_NUMBER_INFINITY || > + if (generation == GENERATION_NUMBER_V1_INFINITY || And this one either. > generation == GENERATION_NUMBER_ZERO) { > all_parents_computed = 0; > commit_list_insert(parent->item, &list); > @@ -2355,8 +2355,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) > for (i = 0; i < g->num_commits; i++) { > struct commit *graph_commit, *odb_commit; > struct commit_list *graph_parents, *odb_parents; > - uint32_t max_generation = 0; > - uint32_t generation; > + timestamp_t max_generation = 0; > + timestamp_t generation; All right. > > display_progress(progress, i + 1); > hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); > diff --git a/commit-graph.h b/commit-graph.h > index 701e3d41aa..430bc830bb 100644 > --- a/commit-graph.h > +++ b/commit-graph.h > @@ -138,13 +138,13 @@ void disable_commit_graph(struct repository *r); > > struct commit_graph_data { > uint32_t graph_pos; > - uint32_t generation; > + timestamp_t generation; > }; All right; this is the main part of this change. > > /* > * Commits should be parsed before accessing generation, graph positions. > */ > -uint32_t commit_graph_generation(const struct commit *); > +timestamp_t commit_graph_generation(const struct commit *); > uint32_t commit_graph_position(const struct commit *); As is this one. > > int compare_commits_by_gen(const void *_a, const void *_b); > diff --git a/commit-reach.c b/commit-reach.c > index c83cc291e7..470bc80139 100644 > --- a/commit-reach.c > +++ b/commit-reach.c > @@ -32,12 +32,12 @@ static int queue_has_nonstale(struct prio_queue *queue) > static struct commit_list *paint_down_to_common(struct repository *r, > struct commit *one, int n, > struct commit **twos, > - int min_generation) > + timestamp_t min_generation) > { > struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; > struct commit_list *result = NULL; > int i; > - uint32_t last_gen = GENERATION_NUMBER_INFINITY; > + timestamp_t last_gen = GENERATION_NUMBER_INFINITY; > > if (!min_generation) > queue.compare = compare_commits_by_commit_date; All right. > @@ -58,10 +58,10 @@ static struct commit_list *paint_down_to_common(struct repository *r, > struct commit *commit = prio_queue_get(&queue); > struct commit_list *parents; > int flags; > - uint32_t generation = commit_graph_generation(commit); > + timestamp_t generation = commit_graph_generation(commit); > > if (min_generation && generation > last_gen) > - BUG("bad generation skip %8x > %8x at %s", > + BUG("bad generation skip %"PRItime" > %"PRItime" at %s", > generation, last_gen, > oid_to_hex(&commit->object.oid)); > last_gen = generation; All right. > @@ -177,12 +177,12 @@ static int remove_redundant(struct repository *r, struct commit **array, int cnt > repo_parse_commit(r, array[i]); > for (i = 0; i < cnt; i++) { > struct commit_list *common; > - uint32_t min_generation = commit_graph_generation(array[i]); > + timestamp_t min_generation = commit_graph_generation(array[i]); > > if (redundant[i]) > continue; > for (j = filled = 0; j < cnt; j++) { > - uint32_t curr_generation; > + timestamp_t curr_generation; > if (i == j || redundant[j]) > continue; > filled_index[filled] = j; All right. > @@ -321,7 +321,7 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit, > { > struct commit_list *bases; > int ret = 0, i; > - uint32_t generation, min_generation = GENERATION_NUMBER_INFINITY; > + timestamp_t generation, min_generation = GENERATION_NUMBER_INFINITY; > > if (repo_parse_commit(r, commit)) > return ret; All right, > @@ -470,7 +470,7 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) > static enum contains_result contains_test(struct commit *candidate, > const struct commit_list *want, > struct contains_cache *cache, > - uint32_t cutoff) > + timestamp_t cutoff) All right. (Sidenote: this one I have missed in my simple search.) > { > enum contains_result *cached = contains_cache_at(cache, candidate); > > @@ -506,11 +506,11 @@ static enum contains_result contains_tag_algo(struct commit *candidate, > { > struct contains_stack contains_stack = { 0, 0, NULL }; > enum contains_result result; > - uint32_t cutoff = GENERATION_NUMBER_INFINITY; > + timestamp_t cutoff = GENERATION_NUMBER_INFINITY; > const struct commit_list *p; > > for (p = want; p; p = p->next) { > - uint32_t generation; > + timestamp_t generation; > struct commit *c = p->item; > load_commit_graph_info(the_repository, c); > generation = commit_graph_generation(c); All right. > @@ -565,7 +565,7 @@ int can_all_from_reach_with_flag(struct object_array *from, > unsigned int with_flag, > unsigned int assign_flag, > time_t min_commit_date, > - uint32_t min_generation) > + timestamp_t min_generation) > { > struct commit **list = NULL; > int i; > @@ -666,13 +666,13 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, > time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0; > struct commit_list *from_iter = from, *to_iter = to; > int result; > - uint32_t min_generation = GENERATION_NUMBER_INFINITY; > + timestamp_t min_generation = GENERATION_NUMBER_INFINITY; > > while (from_iter) { > add_object_array(&from_iter->item->object, NULL, &from_objs); > > if (!parse_commit(from_iter->item)) { > - uint32_t generation; > + timestamp_t generation; > if (from_iter->item->date < min_commit_date) > min_commit_date = from_iter->item->date; > > @@ -686,7 +686,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, > > while (to_iter) { > if (!parse_commit(to_iter->item)) { > - uint32_t generation; > + timestamp_t generation; > if (to_iter->item->date < min_commit_date) > min_commit_date = to_iter->item->date; > All right. > @@ -726,13 +726,13 @@ struct commit_list *get_reachable_subset(struct commit **from, int nr_from, > struct commit_list *found_commits = NULL; > struct commit **to_last = to + nr_to; > struct commit **from_last = from + nr_from; > - uint32_t min_generation = GENERATION_NUMBER_INFINITY; > + timestamp_t min_generation = GENERATION_NUMBER_INFINITY; > int num_to_find = 0; > > struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; > > for (item = to; item < to_last; item++) { > - uint32_t generation; > + timestamp_t generation; > struct commit *c = *item; > > parse_commit(c); All right. > diff --git a/commit-reach.h b/commit-reach.h > index b49ad71a31..148b56fea5 100644 > --- a/commit-reach.h > +++ b/commit-reach.h > @@ -87,7 +87,7 @@ int can_all_from_reach_with_flag(struct object_array *from, > unsigned int with_flag, > unsigned int assign_flag, > time_t min_commit_date, > - uint32_t min_generation); > + timestamp_t min_generation); > int can_all_from_reach(struct commit_list *from, struct commit_list *to, > int commit_date_cutoff); > All right. > diff --git a/commit.h b/commit.h > index e901538909..bc0732a4fe 100644 > --- a/commit.h > +++ b/commit.h > @@ -11,7 +11,8 @@ > #include "commit-slab.h" > > #define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF > -#define GENERATION_NUMBER_INFINITY 0xFFFFFFFF > +#define GENERATION_NUMBER_INFINITY ((1ULL << 63) - 1) > +#define GENERATION_NUMBER_V1_INFINITY 0xFFFFFFFF > #define GENERATION_NUMBER_MAX 0x3FFFFFFF > #define GENERATION_NUMBER_ZERO 0 > Why do we even need GENERATION_NUMBER_V1_INFINITY? It is about marking out-of-graph commits, and it is about in-memory storage. We would need separate GENERATION_NUMBER_V1_MAX and GENERATION_NUMBER_V2_MAX because of different _on-disk_ storage, or in other words file format limitations. But that is for the future commit. > diff --git a/revision.c b/revision.c > index ecf757c327..411852468b 100644 > --- a/revision.c > +++ b/revision.c > @@ -3290,7 +3290,7 @@ define_commit_slab(indegree_slab, int); > define_commit_slab(author_date_slab, timestamp_t); > > struct topo_walk_info { > - uint32_t min_generation; > + timestamp_t min_generation; > struct prio_queue explore_queue; > struct prio_queue indegree_queue; > struct prio_queue topo_queue; All right. > @@ -3336,7 +3336,7 @@ static void explore_walk_step(struct rev_info *revs) > } > > static void explore_to_depth(struct rev_info *revs, > - uint32_t gen_cutoff) > + timestamp_t gen_cutoff) > { > struct topo_walk_info *info = revs->topo_walk_info; > struct commit *c; All right. > @@ -3379,7 +3379,7 @@ static void indegree_walk_step(struct rev_info *revs) > } > > static void compute_indegrees_to_depth(struct rev_info *revs, > - uint32_t gen_cutoff) > + timestamp_t gen_cutoff) > { > struct topo_walk_info *info = revs->topo_walk_info; > struct commit *c; > @@ -3437,7 +3437,7 @@ static void init_topo_walk(struct rev_info *revs) > info->min_generation = GENERATION_NUMBER_INFINITY; > for (list = revs->commits; list; list = list->next) { > struct commit *c = list->item; > - uint32_t generation; > + timestamp_t generation; > > if (parse_commit_gently(c, 1)) > continue; All right. > @@ -3498,7 +3498,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit) > for (p = commit->parents; p; p = p->next) { > struct commit *parent = p->item; > int *pi; > - uint32_t generation; > + timestamp_t generation; > > if (parent->object.flags & UNINTERESTING) > continue; All right. > diff --git a/upload-pack.c b/upload-pack.c > index 80ad9a38d8..bcb8b5dfda 100644 > --- a/upload-pack.c > +++ b/upload-pack.c > @@ -497,7 +497,7 @@ static int got_oid(struct upload_pack_data *data, > > static int ok_to_give_up(struct upload_pack_data *data) > { > - uint32_t min_generation = GENERATION_NUMBER_ZERO; > + timestamp_t min_generation = GENERATION_NUMBER_ZERO; > > if (!data->have_obj.nr) > return 0; All right. Best,
On Fri, Aug 21, 2020 at 03:14:34PM +0200, Jakub Narębski wrote: > "Abhishek Kumar via GitGitGadget" <gitgitgadget@gmail.com> writes: > > > From: Abhishek Kumar <abhishekkumar8222@gmail.com> > > > > In a preparatory step, let's return timestamp_t values from > > commit_graph_generation(), use timestamp_t for local variables > > All right, this is all good. > > > and define GENERATION_NUMBER_INFINITY as (2 ^ 63 - 1) instead. > > This needs more detailed examination. There are two similar constants, > GENERATION_NUMBER_INFINITY and GENERATION_NUMBER_MAX. The former is > used for newest commits outside the commit-graph, while the latter is > maximum number that commits in the commit-graph can have (because of the > storage limitations). We therefore need GENERATION_NUMBER_INFINITY > to be larger than GENERATION_NUMBER_MAX, and it is (and was). > > The GENERATION_NUMBER_INFINITY is because of the above requirement > traditionally taken as maximum value that can be represented in the data > type used to store commit's generation number _in memory_, but it can be > less. For timestamp_t the maximum value that can be represented > is (2 ^ 63 - 1). > > All right then. > > > Related to this, by the end of this series we are using GENERATION_NUMBER_MAX in just one place - compute_generation_numbers() to make sure the topological levels fit within 30 bits. Would it be more appropriate to rename GENERATION_NUMBER_MAX to GENERATION_NUMBER_V1_MAX (along the lines of GENERATION_NUMBER_V2_OFFSET_MAX) to correctly describe that is a limit on topological levels, rather than generation number value? > > The commit message says nothing about the new symbolic constant > GENERATION_NUMBER_V1_INFINITY, though. > > I'm not sure it is even needed (see comments below). Yes, you are correct. I tried it out with your suggestions and it wasn't really needed. Thanks for catching this! > ... Thanks - Abhishek
Abhishek Kumar <abhishekkumar8222@gmail.com> writes: > On Fri, Aug 21, 2020 at 03:14:34PM +0200, Jakub Narębski wrote: >> "Abhishek Kumar via GitGitGadget" <gitgitgadget@gmail.com> writes: >> >>> From: Abhishek Kumar <abhishekkumar8222@gmail.com> >>> >>> In a preparatory step, let's return timestamp_t values from >>> commit_graph_generation(), use timestamp_t for local variables >> >> All right, this is all good. >> >>> and define GENERATION_NUMBER_INFINITY as (2 ^ 63 - 1) instead. >> >> This needs more detailed examination. There are two similar constants, >> GENERATION_NUMBER_INFINITY and GENERATION_NUMBER_MAX. The former is >> used for newest commits outside the commit-graph, while the latter is >> maximum number that commits in the commit-graph can have (because of the >> storage limitations). We therefore need GENERATION_NUMBER_INFINITY >> to be larger than GENERATION_NUMBER_MAX, and it is (and was). >> >> The GENERATION_NUMBER_INFINITY is because of the above requirement >> traditionally taken as maximum value that can be represented in the data >> type used to store commit's generation number _in memory_, but it can be >> less. For timestamp_t the maximum value that can be represented >> is (2 ^ 63 - 1). >> >> All right then. > > Related to this, by the end of this series we are using > GENERATION_NUMBER_MAX in just one place - compute_generation_numbers() > to make sure the topological levels fit within 30 bits. > > Would it be more appropriate to rename GENERATION_NUMBER_MAX to > GENERATION_NUMBER_V1_MAX (along the lines of > GENERATION_NUMBER_V2_OFFSET_MAX) to correctly describe that is a > limit on topological levels, rather than generation number value? Yes, I think that at the end of this patch series we should be using GENERATION_NUMBER_V1_MAX and GENERATION_NUMBER_V2_OFFSET_MAX to describe storage limits, and GENERATION_NUMBER_INFINITY (the latter as generation number value for commits not in graph). We need to ensure that both GENERATION_NUMBER_V1_MAX and GENERATION_NUMBER_V2_OFFSET_MAX are smaller than GENERATION_NUMBER_INFINITY. However, as I wrote, handling GENERATION_NUMBER_V2_OFFSET_MAX is difficult. As far as I can see, we can choose one of the *three* solutions (the third one is _new_): a. store 64-bit corrected commit date in the GDAT chunk all possible values are able to be stored, no need for GENERATION_NUMBER_V2_MAX, b. store 32-bit corrected commit date offset in the GDAT chunk, if its value is larger than GENERATION_NUMBER_V2_OFFSET_MAX, do not write GDAT chunk at all (like for backward compatibility with mixed-version chains of split commit-graph layers), c. store 32-bit corrected commit date offset in the GDAT chunk, using some kind of overflow handling scheme; for example if the most significant bit of 32-bit value is 1, then the rest 31-bits are position in GDOV chunk, which uses 64-bit to store those corrected commit date offsets that do not fit in 32 bits. This type of schema is used in other places in Git code, if I remember it correctly. >> The commit message says nothing about the new symbolic constant >> GENERATION_NUMBER_V1_INFINITY, though. >> >> I'm not sure it is even needed (see comments below). > > Yes, you are correct. I tried it out with your suggestions and it wasn't > really needed. > > Thanks for catching this! Mistakes can happen when changig how the series is split into commits. Best,
On Tue, Aug 25, 2020 at 02:18:24PM +0200, Jakub Narębski wrote: > Abhishek Kumar <abhishekkumar8222@gmail.com> writes: > > ... > > However, as I wrote, handling GENERATION_NUMBER_V2_OFFSET_MAX is > difficult. As far as I can see, we can choose one of the *three* > solutions (the third one is _new_): > > a. store 64-bit corrected commit date in the GDAT chunk > all possible values are able to be stored, no need for > GENERATION_NUMBER_V2_MAX, > > b. store 32-bit corrected commit date offset in the GDAT chunk, > if its value is larger than GENERATION_NUMBER_V2_OFFSET_MAX, > do not write GDAT chunk at all (like for backward compatibility > with mixed-version chains of split commit-graph layers), > > c. store 32-bit corrected commit date offset in the GDAT chunk, > using some kind of overflow handling scheme; for example if > the most significant bit of 32-bit value is 1, then the > rest 31-bits are position in GDOV chunk, which uses 64-bit > to store those corrected commit date offsets that do not > fit in 32 bits. > Alright, so the third solution leverages the fact that in practice, very few offsets would overflow the 32-bit limit. Using 64-bits for all offsets would be wasteful, we can trade off a miniscule amount of computation to save large amounts of disk space. > > This type of schema is used in other places in Git code, if I remember > it correctly. > Yes, it's a similar idea to the extra edge list chunk, where the most significant bit of second parent indicates whether they are more than two parents. It's definitely feasible, albeit a little complex. What's the overall consensus on the third solution? > > >> The commit message says nothing about the new symbolic constant > >> GENERATION_NUMBER_V1_INFINITY, though. > >> > >> I'm not sure it is even needed (see comments below). > > > > Yes, you are correct. I tried it out with your suggestions and it wasn't > > really needed. > > > > Thanks for catching this! > > Mistakes can happen when changig how the series is split into commits. > > Best, > -- > Jakub Narębski Thanks - Abhishek
Hi, Abhishek Kumar <abhishekkumar8222@gmail.com> writes: > On Tue, Aug 25, 2020 at 02:18:24PM +0200, Jakub Narębski wrote: >> Abhishek Kumar <abhishekkumar8222@gmail.com> writes: >> >> ... >> >> However, as I wrote, handling GENERATION_NUMBER_V2_OFFSET_MAX is >> difficult. As far as I can see, we can choose one of the *three* >> solutions (the third one is _new_): >> >> a. store 64-bit corrected commit date in the GDAT chunk >> all possible values are able to be stored, no need for >> GENERATION_NUMBER_V2_MAX, >> >> b. store 32-bit corrected commit date offset in the GDAT chunk, >> if its value is larger than GENERATION_NUMBER_V2_OFFSET_MAX, >> do not write GDAT chunk at all (like for backward compatibility >> with mixed-version chains of split commit-graph layers), >> >> c. store 32-bit corrected commit date offset in the GDAT chunk, >> using some kind of overflow handling scheme; for example if >> the most significant bit of 32-bit value is 1, then the >> rest 31-bits are position in GDOV chunk, which uses 64-bit >> to store those corrected commit date offsets that do not >> fit in 32 bits. Note that I have posted more detailed analysis of advantages and disadvantages of each of the above solutions in response to 11/11 https://public-inbox.org/git/85o8mwb6nq.fsf@gmail.com/ I can think of yet another solution, a variant of approach 'c' with different overflow handling scheme: c'. Store 32-bit corrected commit date offset in the GDAT chunk, using the following overflow handling scheme: if the value is 0xFFFFFFFF (all bits set to 1, the maximum possible value for uint32_t), then the corrected commit date or corrected commit date offset can be found in GDOV chunk (Generation Data OVerflow handling). The GDOV chunk is composed of: - H bytes of commit OID, or 4 bytes (32 bits) of commit pos - 8 bytes (64 bits) of corrected commit date or its offset Commits in GDOV chunk are sorted; as we expect for the number of commits that require GDOV to be zero or a very small number there is no need for GDO Fanout chunk. - advantages: * commit-graph is smaller, increasing for abnormal repos * overflow limit reduced only by 1 (a single value) - disadvantages: * most complex code of all proposed solutions even more complicated than for solution 'c', different from EDGE chunk handling * tests would be needed to exercise the overflow code Or we can split overflow handling into two chunks: GDOI (Generation Data Overflow Index) and GDOV, where GDOI would be composed of H bytes of commit OID or 4 bytes of commit graph position (sorted), and GDOV would be composed oly of 8 bytes (64 bits) of corrected commit date data. This c'') variant has the same advantages and disadvantages as c'), with negligibly slightly larger disk size and possibly slightly better performance because of better data locality. > > Alright, so the third solution leverages the fact that in practice, > very few offsets would overflow the 32-bit limit. Using 64-bits for all > offsets would be wasteful, we can trade off a miniscule amount of > computation to save large amounts of disk space. On the other hand we can say that we can trade negligible increase of commit-graph disk space size (less than 7% in worst case: no octopus merges, no changed-path Bloom filter data, using SHA-1 for object ids, large repository so header size + OIFD is negligible) for simpler code with no need for overflow handling at all (and a minuscule amount of less computations). >> >> This type of schema is used in other places in Git code, if I remember >> it correctly. >> > > Yes, it's a similar idea to the extra edge list chunk, where the most > significant bit of second parent indicates whether they are more than > two parents. Yes and no. Yes, the solution 'c' uses exactly the same mechanism as the pointer from Commid Data chunk into Extra Edges List chunk: [...] If there are more than two parents, the second value has its most-significant bit on and the other bits store an array position into the Extra Edge List chunk. On the other hand we need to have some kind of overflow handling for the list of parents, as the number of parents is not limited in Git (there is no technical upper limit on the number of parents a commits can have), as opposed to for example Mercurial. This is not the case for storing corrected commit date (or corrected commit date offset), as 64 bits is all we would ever need. > It's definitely feasible, albeit a little complex. > > What's the overall consensus on the third solution? Still waiting for others to weight in. Best,
On Thu, Sep 03, 2020 at 03:42:43PM +0200, Jakub Narębski wrote: > Hi, > > Abhishek Kumar <abhishekkumar8222@gmail.com> writes: > > On Tue, Aug 25, 2020 at 02:18:24PM +0200, Jakub Narębski wrote: > >> Abhishek Kumar <abhishekkumar8222@gmail.com> writes: > >> > >> ... > >> > >> However, as I wrote, handling GENERATION_NUMBER_V2_OFFSET_MAX is > >> difficult. As far as I can see, we can choose one of the *three* > >> solutions (the third one is _new_): > >> > >> a. store 64-bit corrected commit date in the GDAT chunk > >> all possible values are able to be stored, no need for > >> GENERATION_NUMBER_V2_MAX, > >> > >> b. store 32-bit corrected commit date offset in the GDAT chunk, > >> if its value is larger than GENERATION_NUMBER_V2_OFFSET_MAX, > >> do not write GDAT chunk at all (like for backward compatibility > >> with mixed-version chains of split commit-graph layers), > >> > >> c. store 32-bit corrected commit date offset in the GDAT chunk, > >> using some kind of overflow handling scheme; for example if > >> the most significant bit of 32-bit value is 1, then the > >> rest 31-bits are position in GDOV chunk, which uses 64-bit > >> to store those corrected commit date offsets that do not > >> fit in 32 bits. > > Note that I have posted more detailed analysis of advantages and > disadvantages of each of the above solutions in response to 11/11 > https://public-inbox.org/git/85o8mwb6nq.fsf@gmail.com/ > > I can think of yet another solution, a variant of approach 'c' with > different overflow handling scheme: > > c'. Store 32-bit corrected commit date offset in the GDAT chunk, > using the following overflow handling scheme: if the value > is 0xFFFFFFFF (all bits set to 1, the maximum possible value > for uint32_t), then the corrected commit date or corrected > commit date offset can be found in GDOV chunk (Generation > Data OVerflow handling). > > The GDOV chunk is composed of: > - H bytes of commit OID, or 4 bytes (32 bits) of commit pos > - 8 bytes (64 bits) of corrected commit date or its offset > > Commits in GDOV chunk are sorted; as we expect for the number > of commits that require GDOV to be zero or a very small number > there is no need for GDO Fanout chunk. > > - advantages: > * commit-graph is smaller, increasing for abnormal repos > * overflow limit reduced only by 1 (a single value) > - disadvantages: > * most complex code of all proposed solutions > even more complicated than for solution 'c', > different from EDGE chunk handling > * tests would be needed to exercise the overflow code > > Or we can split overflow handling into two chunks: GDOI (Generation Data > Overflow Index) and GDOV, where GDOI would be composed of H bytes of > commit OID or 4 bytes of commit graph position (sorted), and GDOV would > be composed oly of 8 bytes (64 bits) of corrected commit date data. > > This c'') variant has the same advantages and disadvantages as c'), with > negligibly slightly larger disk size and possibly slightly better > performance because of better data locality. > The primary benefit of c') over c) seems to the range of valid offsets - c') can range from [0, 0xFFFFFFFF) whereas offsets for c) can range betwen [0, 0x7FFFFFF]. In other words, we should prefer c') over c) only if the offsets are usually in the range [0x7FFFFFFF + 1, 0xFFFFFFFF) Commits were overflowing corrected committer date offsets are rare, and offsets in that particular range are doubly rare. To be wrong within the range would be have an offset of 68 to 136 years, so that's possible only if the corrupted timestamp is in future (it's been 50.68 years since Unix epoch 0 so far). Thikning back to the linux repository, the largest offset was around of the order of 2 ^ 25 (offset of 1.06 years) and I would assume that holds Overall, I don't think the added complexity (compared to c) approach) makes up for by greater versatility. [1]: https://lore.kernel.org/git/20200703082842.GA28027@Abhishek-Arch/ Thanks - Abhishek
Hello, Abhishek Kumar <abhishekkumar8222@gmail.com> writes: > On Thu, Sep 03, 2020 at 03:42:43PM +0200, Jakub Narębski wrote: >> Abhishek Kumar <abhishekkumar8222@gmail.com> writes: >>> On Tue, Aug 25, 2020 at 02:18:24PM +0200, Jakub Narębski wrote: >>>> Abhishek Kumar <abhishekkumar8222@gmail.com> writes: >>>> >>>> ... >>>> >>>> However, as I wrote, handling GENERATION_NUMBER_V2_OFFSET_MAX is >>>> difficult. As far as I can see, we can choose one of the *three* >>>> solutions (the third one is _new_): >>>> >>>> a. store 64-bit corrected commit date in the GDAT chunk >>>> all possible values are able to be stored, no need for >>>> GENERATION_NUMBER_V2_MAX, >>>> >>>> b. store 32-bit corrected commit date offset in the GDAT chunk, >>>> if its value is larger than GENERATION_NUMBER_V2_OFFSET_MAX, >>>> do not write GDAT chunk at all (like for backward compatibility >>>> with mixed-version chains of split commit-graph layers), >>>> >>>> c. store 32-bit corrected commit date offset in the GDAT chunk, >>>> using some kind of overflow handling scheme; for example if >>>> the most significant bit of 32-bit value is 1, then the >>>> rest 31-bits are position in GDOV chunk, which uses 64-bit >>>> to store those corrected commit date offsets that do not >>>> fit in 32 bits. >> >> Note that I have posted more detailed analysis of advantages and >> disadvantages of each of the above solutions in response to 11/11 >> https://public-inbox.org/git/85o8mwb6nq.fsf@gmail.com/ >> >> I can think of yet another solution, a variant of approach 'c' with >> different overflow handling scheme: >> >> c'. Store 32-bit corrected commit date offset in the GDAT chunk, >> using the following overflow handling scheme: if the value >> is 0xFFFFFFFF (all bits set to 1, the maximum possible value >> for uint32_t), then the corrected commit date or corrected >> commit date offset can be found in GDOV chunk (Generation >> Data OVerflow handling). >> >> The GDOV chunk is composed of: >> - H bytes of commit OID, or 4 bytes (32 bits) of commit pos >> - 8 bytes (64 bits) of corrected commit date or its offset >> >> Commits in GDOV chunk are sorted; as we expect for the number >> of commits that require GDOV to be zero or a very small number >> there is no need for GDO Fanout chunk. >> >> - advantages: >> * commit-graph is smaller, increasing for abnormal repos >> * overflow limit reduced only by 1 (a single value) >> - disadvantages: >> * most complex code of all proposed solutions >> even more complicated than for solution 'c', >> different from EDGE chunk handling >> * tests would be needed to exercise the overflow code >> >> Or we can split overflow handling into two chunks: GDOI (Generation Data >> Overflow Index) and GDOV, where GDOI would be composed of H bytes of >> commit OID or 4 bytes of commit graph position (sorted), and GDOV would >> be composed oly of 8 bytes (64 bits) of corrected commit date data. >> >> This c'') variant has the same advantages and disadvantages as c'), with >> negligibly slightly larger disk size and possibly slightly better >> performance because of better data locality. >> > > The primary benefit of c') over c) seems to the range of valid offsets - > c') can range from [0, 0xFFFFFFFF) whereas offsets for c) can range > betwen [0, 0x7FFFFFF]. > > In other words, we should prefer c') over c) only if the offsets are > usually in the range [0x7FFFFFFF + 1, 0xFFFFFFFF) > > Commits were overflowing corrected committer date offsets are rare, and > offsets in that particular range are doubly rare. To be wrong within > the range would be have an offset of 68 to 136 years, so that's possible > only if the corrupted timestamp is in future (it's been 50.68 years since > Unix epoch 0 so far). Right, the c) variant has the same limitation as if corrected commit date offsets were stored as signed 32-bit integer (int32_t), so to have overflow we would have date post Y2k38 followed by date of Unix epoch 0. Very unlikely. > > Thinking back to the linux repository, the largest offset was around of > the order of 2 ^ 25 (offset of 1.06 years) and I would assume that holds > > Overall, I don't think the added complexity (compared to c) approach) > makes up for by greater versatility. > > [1]: https://lore.kernel.org/git/20200703082842.GA28027@Abhishek-Arch/ All right. With variant c) we have additional advantage in that we can pattern the code on the code for EDGE chunk handling, as you said. I wanted to warn about the need for sanity checking, like ensuring that we have GDOV chunk and that it is large enough -- but it turns out that we skip this bounds checking for extra edges / EDGE chunk: if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) { pptr = insert_parent_or_die(r, g, edge_value, pptr); return 1; } parent_data_ptr = (uint32_t*)(g->chunk_extra_edges + 4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK)); do { edge_value = get_be32(parent_data_ptr); pptr = insert_parent_or_die(r, g, edge_value & GRAPH_EDGE_LAST_MASK, pptr); parent_data_ptr++; } while (!(edge_value & GRAPH_LAST_EDGE)); Best,
Hello, Jakub Narębski <jnareb@gmail.com> writes: > Abhishek Kumar <abhishekkumar8222@gmail.com> writes: >> On Thu, Sep 03, 2020 at 03:42:43PM +0200, Jakub Narębski wrote: >>> Abhishek Kumar <abhishekkumar8222@gmail.com> writes: >>>> On Tue, Aug 25, 2020 at 02:18:24PM +0200, Jakub Narębski wrote: >>>>> Abhishek Kumar <abhishekkumar8222@gmail.com> writes: >>>>> >>>>> ... >>>>> >>>>> However, as I wrote, handling GENERATION_NUMBER_V2_OFFSET_MAX is >>>>> difficult. As far as I can see, we can choose one of the *three* >>>>> solutions (the third one is _new_): >>>>> >>>>> a. store 64-bit corrected commit date in the GDAT chunk >>>>> all possible values are able to be stored, no need for >>>>> GENERATION_NUMBER_V2_MAX, >>>>> >>>>> b. store 32-bit corrected commit date offset in the GDAT chunk, >>>>> if its value is larger than GENERATION_NUMBER_V2_OFFSET_MAX, >>>>> do not write GDAT chunk at all (like for backward compatibility >>>>> with mixed-version chains of split commit-graph layers), >>>>> >>>>> c. store 32-bit corrected commit date offset in the GDAT chunk, >>>>> using some kind of overflow handling scheme; for example if >>>>> the most significant bit of 32-bit value is 1, then the >>>>> rest 31-bits are position in GDOV chunk, which uses 64-bit >>>>> to store those corrected commit date offsets that do not >>>>> fit in 32 bits. >>> >>> Note that I have posted more detailed analysis of advantages and >>> disadvantages of each of the above solutions in response to 11/11 >>> https://public-inbox.org/git/85o8mwb6nq.fsf@gmail.com/ >>> >>> I can think of yet another solution, a variant of approach 'c' [...] >> >> The primary benefit of c') over c) seems to the range of valid offsets - >> c') can range from [0, 0xFFFFFFFF) whereas offsets for c) can range >> betwen [0, 0x7FFFFFF]. >> >> In other words, we should prefer c') over c) only if the offsets are >> usually in the range [0x7FFFFFFF + 1, 0xFFFFFFFF) >> >> Commits were overflowing corrected committer date offsets are rare, and >> offsets in that particular range are doubly rare. To be wrong within >> the range would be have an offset of 68 to 136 years, so that's possible >> only if the corrupted timestamp is in future (it's been 50.68 years since >> Unix epoch 0 so far). > > Right, the c) variant has the same limitation as if corrected commit > date offsets were stored as signed 32-bit integer (int32_t), so to have > overflow we would have date post Y2k38 followed by date of Unix epoch 0. > Very unlikely. > >> Thinking back to the linux repository, the largest offset was around of >> the order of 2 ^ 25 (offset of 1.06 years) and I would assume that holds >> >> Overall, I don't think the added complexity (compared to c) approach) >> makes up for by greater versatility. >> >> [1]: https://lore.kernel.org/git/20200703082842.GA28027@Abhishek-Arch/ > > All right. > > With variant c) we have additional advantage in that we can pattern the > code on the code for EDGE chunk handling, as you said. > > I wanted to warn about the need for sanity checking, like ensuring that > we have GDOV chunk and that it is large enough -- but it turns out that > we skip this bounds checking for extra edges / EDGE chunk: > > if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) { > pptr = insert_parent_or_die(r, g, edge_value, pptr); > return 1; > } > > parent_data_ptr = (uint32_t*)(g->chunk_extra_edges + > 4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK)); > do { > edge_value = get_be32(parent_data_ptr); > pptr = insert_parent_or_die(r, g, > edge_value & GRAPH_EDGE_LAST_MASK, > pptr); > parent_data_ptr++; > } while (!(edge_value & GRAPH_LAST_EDGE)); Both Taylor Blau and Junio C Hamano agree that it is better to store corrected commit date offsets and have overload handling to halve (to reduce by 50%) the size of the GDAT chunk. I have not heard from Derrick Stolee. It looks then that it is the way to go; as I said that you have convinced me that variant 'c' (EDGE-like) is the best solution for overflow handling. Best,
On Mon, Sep 28, 2020 at 11:48:05PM +0200, Jakub Narębski wrote: > Hello, > > > Both Taylor Blau and Junio C Hamano agree that it is better to store > corrected commit date offsets and have overload handling to halve (to > reduce by 50%) the size of the GDAT chunk. I have not heard from > Derrick Stolee. > > It looks then that it is the way to go; as I said that you have > convinced me that variant 'c' (EDGE-like) is the best solution for > overflow handling. > Great! So I have implemented the variant 'c' and I am unsure whether my tests are exhaustive enough. Can you preview the commit "commit-graph: implement generation data chunk" [1] on the pull request?. Apart from that, I am ready to publish the v4 to the mailing list. [1]: https://github.com/gitgitgadget/git/pull/676/commits/390973da1d744cbb8a08a3b99c991f6d04ae9baf > Best, > -- > Jakub Narębski
diff --git a/commit-graph.c b/commit-graph.c index fb6e2bf18f..7f9f858577 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -99,7 +99,7 @@ uint32_t commit_graph_position(const struct commit *c) return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH; } -uint32_t commit_graph_generation(const struct commit *c) +timestamp_t commit_graph_generation(const struct commit *c) { struct commit_graph_data *data = commit_graph_data_slab_peek(&commit_graph_data_slab, c); @@ -115,8 +115,8 @@ uint32_t commit_graph_generation(const struct commit *c) int compare_commits_by_gen(const void *_a, const void *_b) { const struct commit *a = _a, *b = _b; - const uint32_t generation_a = commit_graph_generation(a); - const uint32_t generation_b = commit_graph_generation(b); + const timestamp_t generation_a = commit_graph_generation(a); + const timestamp_t generation_b = commit_graph_generation(b); /* older commits first */ if (generation_a < generation_b) @@ -159,8 +159,8 @@ static int commit_gen_cmp(const void *va, const void *vb) const struct commit *a = *(const struct commit **)va; const struct commit *b = *(const struct commit **)vb; - uint32_t generation_a = commit_graph_data_at(a)->generation; - uint32_t generation_b = commit_graph_data_at(b)->generation; + const timestamp_t generation_a = commit_graph_data_at(a)->generation; + const timestamp_t generation_b = commit_graph_data_at(b)->generation; /* lower generation commits first */ if (generation_a < generation_b) return -1; @@ -1338,7 +1338,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation; display_progress(ctx->progress, i + 1); - if (generation != GENERATION_NUMBER_INFINITY && + if (generation != GENERATION_NUMBER_V1_INFINITY && generation != GENERATION_NUMBER_ZERO) continue; @@ -1352,7 +1352,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) for (parent = current->parents; parent; parent = parent->next) { generation = commit_graph_data_at(parent->item)->generation; - if (generation == GENERATION_NUMBER_INFINITY || + if (generation == GENERATION_NUMBER_V1_INFINITY || generation == GENERATION_NUMBER_ZERO) { all_parents_computed = 0; commit_list_insert(parent->item, &list); @@ -2355,8 +2355,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit, *odb_commit; struct commit_list *graph_parents, *odb_parents; - uint32_t max_generation = 0; - uint32_t generation; + timestamp_t max_generation = 0; + timestamp_t generation; display_progress(progress, i + 1); hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); diff --git a/commit-graph.h b/commit-graph.h index 701e3d41aa..430bc830bb 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -138,13 +138,13 @@ void disable_commit_graph(struct repository *r); struct commit_graph_data { uint32_t graph_pos; - uint32_t generation; + timestamp_t generation; }; /* * Commits should be parsed before accessing generation, graph positions. */ -uint32_t commit_graph_generation(const struct commit *); +timestamp_t commit_graph_generation(const struct commit *); uint32_t commit_graph_position(const struct commit *); int compare_commits_by_gen(const void *_a, const void *_b); diff --git a/commit-reach.c b/commit-reach.c index c83cc291e7..470bc80139 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -32,12 +32,12 @@ static int queue_has_nonstale(struct prio_queue *queue) static struct commit_list *paint_down_to_common(struct repository *r, struct commit *one, int n, struct commit **twos, - int min_generation) + timestamp_t min_generation) { struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; - uint32_t last_gen = GENERATION_NUMBER_INFINITY; + timestamp_t last_gen = GENERATION_NUMBER_INFINITY; if (!min_generation) queue.compare = compare_commits_by_commit_date; @@ -58,10 +58,10 @@ static struct commit_list *paint_down_to_common(struct repository *r, struct commit *commit = prio_queue_get(&queue); struct commit_list *parents; int flags; - uint32_t generation = commit_graph_generation(commit); + timestamp_t generation = commit_graph_generation(commit); if (min_generation && generation > last_gen) - BUG("bad generation skip %8x > %8x at %s", + BUG("bad generation skip %"PRItime" > %"PRItime" at %s", generation, last_gen, oid_to_hex(&commit->object.oid)); last_gen = generation; @@ -177,12 +177,12 @@ static int remove_redundant(struct repository *r, struct commit **array, int cnt repo_parse_commit(r, array[i]); for (i = 0; i < cnt; i++) { struct commit_list *common; - uint32_t min_generation = commit_graph_generation(array[i]); + timestamp_t min_generation = commit_graph_generation(array[i]); if (redundant[i]) continue; for (j = filled = 0; j < cnt; j++) { - uint32_t curr_generation; + timestamp_t curr_generation; if (i == j || redundant[j]) continue; filled_index[filled] = j; @@ -321,7 +321,7 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit, { struct commit_list *bases; int ret = 0, i; - uint32_t generation, min_generation = GENERATION_NUMBER_INFINITY; + timestamp_t generation, min_generation = GENERATION_NUMBER_INFINITY; if (repo_parse_commit(r, commit)) return ret; @@ -470,7 +470,7 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) static enum contains_result contains_test(struct commit *candidate, const struct commit_list *want, struct contains_cache *cache, - uint32_t cutoff) + timestamp_t cutoff) { enum contains_result *cached = contains_cache_at(cache, candidate); @@ -506,11 +506,11 @@ static enum contains_result contains_tag_algo(struct commit *candidate, { struct contains_stack contains_stack = { 0, 0, NULL }; enum contains_result result; - uint32_t cutoff = GENERATION_NUMBER_INFINITY; + timestamp_t cutoff = GENERATION_NUMBER_INFINITY; const struct commit_list *p; for (p = want; p; p = p->next) { - uint32_t generation; + timestamp_t generation; struct commit *c = p->item; load_commit_graph_info(the_repository, c); generation = commit_graph_generation(c); @@ -565,7 +565,7 @@ int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, time_t min_commit_date, - uint32_t min_generation) + timestamp_t min_generation) { struct commit **list = NULL; int i; @@ -666,13 +666,13 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0; struct commit_list *from_iter = from, *to_iter = to; int result; - uint32_t min_generation = GENERATION_NUMBER_INFINITY; + timestamp_t min_generation = GENERATION_NUMBER_INFINITY; while (from_iter) { add_object_array(&from_iter->item->object, NULL, &from_objs); if (!parse_commit(from_iter->item)) { - uint32_t generation; + timestamp_t generation; if (from_iter->item->date < min_commit_date) min_commit_date = from_iter->item->date; @@ -686,7 +686,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, while (to_iter) { if (!parse_commit(to_iter->item)) { - uint32_t generation; + timestamp_t generation; if (to_iter->item->date < min_commit_date) min_commit_date = to_iter->item->date; @@ -726,13 +726,13 @@ struct commit_list *get_reachable_subset(struct commit **from, int nr_from, struct commit_list *found_commits = NULL; struct commit **to_last = to + nr_to; struct commit **from_last = from + nr_from; - uint32_t min_generation = GENERATION_NUMBER_INFINITY; + timestamp_t min_generation = GENERATION_NUMBER_INFINITY; int num_to_find = 0; struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; for (item = to; item < to_last; item++) { - uint32_t generation; + timestamp_t generation; struct commit *c = *item; parse_commit(c); diff --git a/commit-reach.h b/commit-reach.h index b49ad71a31..148b56fea5 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -87,7 +87,7 @@ int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, time_t min_commit_date, - uint32_t min_generation); + timestamp_t min_generation); int can_all_from_reach(struct commit_list *from, struct commit_list *to, int commit_date_cutoff); diff --git a/commit.h b/commit.h index e901538909..bc0732a4fe 100644 --- a/commit.h +++ b/commit.h @@ -11,7 +11,8 @@ #include "commit-slab.h" #define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF -#define GENERATION_NUMBER_INFINITY 0xFFFFFFFF +#define GENERATION_NUMBER_INFINITY ((1ULL << 63) - 1) +#define GENERATION_NUMBER_V1_INFINITY 0xFFFFFFFF #define GENERATION_NUMBER_MAX 0x3FFFFFFF #define GENERATION_NUMBER_ZERO 0 diff --git a/revision.c b/revision.c index ecf757c327..411852468b 100644 --- a/revision.c +++ b/revision.c @@ -3290,7 +3290,7 @@ define_commit_slab(indegree_slab, int); define_commit_slab(author_date_slab, timestamp_t); struct topo_walk_info { - uint32_t min_generation; + timestamp_t min_generation; struct prio_queue explore_queue; struct prio_queue indegree_queue; struct prio_queue topo_queue; @@ -3336,7 +3336,7 @@ static void explore_walk_step(struct rev_info *revs) } static void explore_to_depth(struct rev_info *revs, - uint32_t gen_cutoff) + timestamp_t gen_cutoff) { struct topo_walk_info *info = revs->topo_walk_info; struct commit *c; @@ -3379,7 +3379,7 @@ static void indegree_walk_step(struct rev_info *revs) } static void compute_indegrees_to_depth(struct rev_info *revs, - uint32_t gen_cutoff) + timestamp_t gen_cutoff) { struct topo_walk_info *info = revs->topo_walk_info; struct commit *c; @@ -3437,7 +3437,7 @@ static void init_topo_walk(struct rev_info *revs) info->min_generation = GENERATION_NUMBER_INFINITY; for (list = revs->commits; list; list = list->next) { struct commit *c = list->item; - uint32_t generation; + timestamp_t generation; if (parse_commit_gently(c, 1)) continue; @@ -3498,7 +3498,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit) for (p = commit->parents; p; p = p->next) { struct commit *parent = p->item; int *pi; - uint32_t generation; + timestamp_t generation; if (parent->object.flags & UNINTERESTING) continue; diff --git a/upload-pack.c b/upload-pack.c index 80ad9a38d8..bcb8b5dfda 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -497,7 +497,7 @@ static int got_oid(struct upload_pack_data *data, static int ok_to_give_up(struct upload_pack_data *data) { - uint32_t min_generation = GENERATION_NUMBER_ZERO; + timestamp_t min_generation = GENERATION_NUMBER_ZERO; if (!data->have_obj.nr) return 0;