diff mbox series

[v5,07/11] commit-graph: implement corrected commit date

Message ID 859c39eff52e32ad322969d024184971acec82e7.1609154168.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Implement Corrected Commit Date | expand

Commit Message

Abhishek Kumar Dec. 28, 2020, 11:16 a.m. UTC
From: Abhishek Kumar <abhishekkumar8222@gmail.com>

With most of preparations done, let's implement corrected commit date.

The corrected commit date for a commit is defined as:

* A commit with no parents (a root commit) has corrected commit date
  equal to its committer date.
* A commit with at least one parent has corrected commit date equal to
  the maximum of its commit date and one more than the largest corrected
  commit date among its parents.

As a special case, a root commit with timestamp of zero (01.01.1970
00:00:00Z) has corrected commit date of one, to be able to distinguish
from GENERATION_NUMBER_ZERO (that is, an uncomputed corrected commit
date).

To minimize the space required to store corrected commit date, Git
stores corrected commit date offsets into the commit-graph file. The
corrected commit date offset for a commit is defined as the difference
between its corrected commit date and actual commit date.

Storing corrected commit date requires sizeof(timestamp_t) bytes, which
in most cases is 64 bits (uintmax_t). However, corrected commit date
offsets can be safely stored using only 32-bits. This halves the size
of GDAT chunk, which is a reduction of around 6% in the size of
commit-graph file.

However, using offsets be problematic if one of commits is malformed but
valid and has committerdate of 0 Unix time, as the offset would be the
same as corrected commit date and thus require 64-bits to be stored
properly.

While Git does not write out offsets at this stage, Git stores the
corrected commit dates in member generation of struct commit_graph_data.
It will begin writing commit date offsets with the introduction of
generation data chunk.

Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
---
 commit-graph.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

Comments

Derrick Stolee Dec. 30, 2020, 1:53 a.m. UTC | #1
On 12/28/2020 6:16 AM, Abhishek Kumar via GitGitGadget wrote:
> From: Abhishek Kumar <abhishekkumar8222@gmail.com>
> 
> With most of preparations done, let's implement corrected commit date.
> 
> The corrected commit date for a commit is defined as:
> 
> * A commit with no parents (a root commit) has corrected commit date
>   equal to its committer date.
> * A commit with at least one parent has corrected commit date equal to
>   the maximum of its commit date and one more than the largest corrected
>   commit date among its parents.
> 
> As a special case, a root commit with timestamp of zero (01.01.1970
> 00:00:00Z) has corrected commit date of one, to be able to distinguish
> from GENERATION_NUMBER_ZERO (that is, an uncomputed corrected commit
> date).
> 
> To minimize the space required to store corrected commit date, Git
> stores corrected commit date offsets into the commit-graph file. The
> corrected commit date offset for a commit is defined as the difference
> between its corrected commit date and actual commit date.
> 
> Storing corrected commit date requires sizeof(timestamp_t) bytes, which
> in most cases is 64 bits (uintmax_t). However, corrected commit date
> offsets can be safely stored using only 32-bits. This halves the size
> of GDAT chunk, which is a reduction of around 6% in the size of
> commit-graph file.
> 
> However, using offsets be problematic if one of commits is malformed but

However, using 32-bit offsets is problematic if a commit is malformed...

> valid and has committerdate of 0 Unix time, as the offset would be the

s/committerdate/committer date/

> same as corrected commit date and thus require 64-bits to be stored
> properly.
> 
> While Git does not write out offsets at this stage, Git stores the
> corrected commit dates in member generation of struct commit_graph_data.
> It will begin writing commit date offsets with the introduction of
> generation data chunk.
> 
> Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
> ---
>  commit-graph.c | 21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/commit-graph.c b/commit-graph.c
> index 1b2a015f92f..bfc3aae5f93 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -1339,9 +1339,11 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
>  					ctx->commits.nr);
>  	for (i = 0; i < ctx->commits.nr; i++) {
>  		uint32_t level = *topo_level_slab_at(ctx->topo_levels, ctx->commits.list[i]);
> +		timestamp_t corrected_commit_date = commit_graph_data_at(ctx->commits.list[i])->generation;
>  
>  		display_progress(ctx->progress, i + 1);
> -		if (level != GENERATION_NUMBER_ZERO)
> +		if (level != GENERATION_NUMBER_ZERO &&
> +		    corrected_commit_date != GENERATION_NUMBER_ZERO)
>  			continue;
>  
>  		commit_list_insert(ctx->commits.list[i], &list);
> @@ -1350,16 +1352,23 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
>  			struct commit_list *parent;
>  			int all_parents_computed = 1;
>  			uint32_t max_level = 0;
> +			timestamp_t max_corrected_commit_date = 0;
>  
>  			for (parent = current->parents; parent; parent = parent->next) {
>  				level = *topo_level_slab_at(ctx->topo_levels, parent->item);
> +				corrected_commit_date = commit_graph_data_at(parent->item)->generation;
>  
> -				if (level == GENERATION_NUMBER_ZERO) {
> +				if (level == GENERATION_NUMBER_ZERO ||
> +				    corrected_commit_date == GENERATION_NUMBER_ZERO) {
>  					all_parents_computed = 0;
>  					commit_list_insert(parent->item, &list);
>  					break;
> -				} else if (level > max_level) {
> -					max_level = level;
> +				} else {
> +					if (level > max_level)
> +						max_level = level;
> +
> +					if (corrected_commit_date > max_corrected_commit_date)
> +						max_corrected_commit_date = corrected_commit_date;

nit: the "break" in the first case makes it so this large else block
is unnecessary. 

-				if (level == GENERATION_NUMBER_ZERO) {
+				if (level == GENERATION_NUMBER_ZERO ||
+				    corrected_commit_date == GENERATION_NUMBER_ZERO) {
 					all_parents_computed = 0;
 					commit_list_insert(parent->item, &list);
 					break;
-				} else if (level > max_level) {
-					max_level = level;
+				
+				if (level > max_level)
+					max_level = level;
+
+				if (corrected_commit_date > max_corrected_commit_date)
+					max_corrected_commit_date = corrected_commit_date;
-				}
 			}

Thanks,
-Stolee
Abhishek Kumar Jan. 10, 2021, 12:21 p.m. UTC | #2
On Tue, Dec 29, 2020 at 08:53:11PM -0500, Derrick Stolee wrote:
> On 12/28/2020 6:16 AM, Abhishek Kumar via GitGitGadget wrote:
> > From: Abhishek Kumar <abhishekkumar8222@gmail.com>
> > 
> > With most of preparations done, let's implement corrected commit date.
> > 
> > The corrected commit date for a commit is defined as:
> > 
> > * A commit with no parents (a root commit) has corrected commit date
> >   equal to its committer date.
> > * A commit with at least one parent has corrected commit date equal to
> >   the maximum of its commit date and one more than the largest corrected
> >   commit date among its parents.
> > 
> > As a special case, a root commit with timestamp of zero (01.01.1970
> > 00:00:00Z) has corrected commit date of one, to be able to distinguish
> > from GENERATION_NUMBER_ZERO (that is, an uncomputed corrected commit
> > date).
> > 
> > To minimize the space required to store corrected commit date, Git
> > stores corrected commit date offsets into the commit-graph file. The
> > corrected commit date offset for a commit is defined as the difference
> > between its corrected commit date and actual commit date.
> > 
> > Storing corrected commit date requires sizeof(timestamp_t) bytes, which
> > in most cases is 64 bits (uintmax_t). However, corrected commit date
> > offsets can be safely stored using only 32-bits. This halves the size
> > of GDAT chunk, which is a reduction of around 6% in the size of
> > commit-graph file.
> > 
> > However, using offsets be problematic if one of commits is malformed but
> 
> However, using 32-bit offsets is problematic if a commit is malformed...
> 
> > valid and has committerdate of 0 Unix time, as the offset would be the
> 
> s/committerdate/committer date/
> 
> > same as corrected commit date and thus require 64-bits to be stored
> > properly.
> > 
> > While Git does not write out offsets at this stage, Git stores the
> > corrected commit dates in member generation of struct commit_graph_data.
> > It will begin writing commit date offsets with the introduction of
> > generation data chunk.
> > 
> > Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
> > ---
> >  commit-graph.c | 21 +++++++++++++++++----
> >  1 file changed, 17 insertions(+), 4 deletions(-)
> > 
> > diff --git a/commit-graph.c b/commit-graph.c
> > index 1b2a015f92f..bfc3aae5f93 100644
> > --- a/commit-graph.c
> > +++ b/commit-graph.c
> > @@ -1339,9 +1339,11 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
> >  					ctx->commits.nr);
> >  	for (i = 0; i < ctx->commits.nr; i++) {
> >  		uint32_t level = *topo_level_slab_at(ctx->topo_levels, ctx->commits.list[i]);
> > +		timestamp_t corrected_commit_date = commit_graph_data_at(ctx->commits.list[i])->generation;
> >  
> >  		display_progress(ctx->progress, i + 1);
> > -		if (level != GENERATION_NUMBER_ZERO)
> > +		if (level != GENERATION_NUMBER_ZERO &&
> > +		    corrected_commit_date != GENERATION_NUMBER_ZERO)
> >  			continue;
> >  
> >  		commit_list_insert(ctx->commits.list[i], &list);
> > @@ -1350,16 +1352,23 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
> >  			struct commit_list *parent;
> >  			int all_parents_computed = 1;
> >  			uint32_t max_level = 0;
> > +			timestamp_t max_corrected_commit_date = 0;
> >  
> >  			for (parent = current->parents; parent; parent = parent->next) {
> >  				level = *topo_level_slab_at(ctx->topo_levels, parent->item);
> > +				corrected_commit_date = commit_graph_data_at(parent->item)->generation;
> >  
> > -				if (level == GENERATION_NUMBER_ZERO) {
> > +				if (level == GENERATION_NUMBER_ZERO ||
> > +				    corrected_commit_date == GENERATION_NUMBER_ZERO) {
> >  					all_parents_computed = 0;
> >  					commit_list_insert(parent->item, &list);
> >  					break;
> > -				} else if (level > max_level) {
> > -					max_level = level;
> > +				} else {
> > +					if (level > max_level)
> > +						max_level = level;
> > +
> > +					if (corrected_commit_date > max_corrected_commit_date)
> > +						max_corrected_commit_date = corrected_commit_date;
> 
> nit: the "break" in the first case makes it so this large else block
> is unnecessary. 

Thanks, removed.

> 
> -				if (level == GENERATION_NUMBER_ZERO) {
> +				if (level == GENERATION_NUMBER_ZERO ||
> +				    corrected_commit_date == GENERATION_NUMBER_ZERO) {
>  					all_parents_computed = 0;
>  					commit_list_insert(parent->item, &list);
>  					break;
> -				} else if (level > max_level) {
> -					max_level = level;
> +				
> +				if (level > max_level)
> +					max_level = level;
> +
> +				if (corrected_commit_date > max_corrected_commit_date)
> +					max_corrected_commit_date = corrected_commit_date;
> -				}
>  			}
> 
> Thanks,
> -Stolee
> 

Thanks
- Abhishek
diff mbox series

Patch

diff --git a/commit-graph.c b/commit-graph.c
index 1b2a015f92f..bfc3aae5f93 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1339,9 +1339,11 @@  static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 					ctx->commits.nr);
 	for (i = 0; i < ctx->commits.nr; i++) {
 		uint32_t level = *topo_level_slab_at(ctx->topo_levels, ctx->commits.list[i]);
+		timestamp_t corrected_commit_date = commit_graph_data_at(ctx->commits.list[i])->generation;
 
 		display_progress(ctx->progress, i + 1);
-		if (level != GENERATION_NUMBER_ZERO)
+		if (level != GENERATION_NUMBER_ZERO &&
+		    corrected_commit_date != GENERATION_NUMBER_ZERO)
 			continue;
 
 		commit_list_insert(ctx->commits.list[i], &list);
@@ -1350,16 +1352,23 @@  static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 			struct commit_list *parent;
 			int all_parents_computed = 1;
 			uint32_t max_level = 0;
+			timestamp_t max_corrected_commit_date = 0;
 
 			for (parent = current->parents; parent; parent = parent->next) {
 				level = *topo_level_slab_at(ctx->topo_levels, parent->item);
+				corrected_commit_date = commit_graph_data_at(parent->item)->generation;
 
-				if (level == GENERATION_NUMBER_ZERO) {
+				if (level == GENERATION_NUMBER_ZERO ||
+				    corrected_commit_date == GENERATION_NUMBER_ZERO) {
 					all_parents_computed = 0;
 					commit_list_insert(parent->item, &list);
 					break;
-				} else if (level > max_level) {
-					max_level = level;
+				} else {
+					if (level > max_level)
+						max_level = level;
+
+					if (corrected_commit_date > max_corrected_commit_date)
+						max_corrected_commit_date = corrected_commit_date;
 				}
 			}
 
@@ -1369,6 +1378,10 @@  static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 				if (max_level > GENERATION_NUMBER_V1_MAX - 1)
 					max_level = GENERATION_NUMBER_V1_MAX - 1;
 				*topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
+
+				if (current->date && current->date > max_corrected_commit_date)
+					max_corrected_commit_date = current->date - 1;
+				commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
 			}
 		}
 	}