diff mbox series

[7/7] commit-graph.c: introduce '--[no-]check-oids'

Message ID 1ff42f4c3d568dd25889d2808cda3edf38a36cb9.1586836700.git.me@ttaylorr.com (mailing list archive)
State New, archived
Headers show
Series commit-graph: split strategies, '--[no-]check-oids' | expand

Commit Message

Taylor Blau April 14, 2020, 4:04 a.m. UTC
When operating on a stream of commit OIDs on stdin, 'git commit-graph
write' checks that each OID refers to an object that is indeed a commit.
This is convenient to make sure that the given input is well-formed, but
can sometimes be undesirable.

For example, server operators may wish to feed the refnames that were
updated during a push to 'git commit-graph write --input=stdin-commits',
and silently discard refs that don't point at commits. This can be done
by combing the output of 'git for-each-ref' with '--format
%(*objecttype)', but this requires opening up a potentially large number
of objects.  Instead, it is more convenient to feed the updated refs to
the commit-graph machinery, and let it throw out refs that don't point
to commits.

Introduce '--[no-]check-oids' to make such a behavior possible. With
'--check-oids' (the default behavior to retain backwards compatibility),
'git commit-graph write' will barf on a non-commit line in its input.
With 'no-check-oids', such lines will be silently ignored, making the
above possible by specifying this option.

No matter which is supplied, 'git commit-graph write' retains the
behavior from the previous commit of rejecting non-OID inputs like
"HEAD" and "refs/heads/foo" as before.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 Documentation/git-commit-graph.txt |  5 +++++
 builtin/commit-graph.c             | 11 ++++++++---
 t/t5318-commit-graph.sh            | 28 ++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

Comments

Taylor Blau April 15, 2020, 4:29 a.m. UTC | #1
Whoops. I sent the wrong version of this patch. It should be the below:

--- >8 ---

Subject: [PATCH] shallow.c: use 'reset_repository_shallow' when appropriate

In bd0b42aed3 (fetch-pack: do not take shallow lock unnecessarily,
2019-01-10), the author noted that 'is_repository_shallow' produces
visible side-effect(s) by setting 'is_shallow' and 'shallow_stat'.

This is a problem for e.g., fetching with '--update-shallow' in a
shallow repsoitory with 'fetch.writeCommitGraph' enabled, since the
update to '.git/shallow' will cause Git to think that the repository
*isn't* shallow when it is, thereby circumventing the commit-graph
compatability check.

This causes problems in shallow repositories with at least shallow refs
that have at least one ancestor (since the client won't have those
object(s), and therefore can't take the reachability closure over
commits to be written to the commit-graph).

Address this by introducing 'reset_repository_shallow()', and calling it
when the shallow file is updated, forcing 'is_repository_shallow' to
re-evaluate whether the repository is still shallow after fetching in
the above scenario.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 commit.h     |  1 +
 fetch-pack.c |  1 +
 shallow.c    | 15 ++++++++-------
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/commit.h b/commit.h
index 008a0fa4a0..ee1ba139d4 100644
--- a/commit.h
+++ b/commit.h
@@ -251,6 +251,7 @@ int register_shallow(struct repository *r, const struct object_id *oid);
 int unregister_shallow(const struct object_id *oid);
 int for_each_commit_graft(each_commit_graft_fn, void *);
 int is_repository_shallow(struct repository *r);
+void reset_repository_shallow(struct repository *r);
 struct commit_list *get_shallow_commits(struct object_array *heads,
 					int depth, int shallow_flag, int not_shallow_flag);
 struct commit_list *get_shallow_commits_by_rev_list(
diff --git a/fetch-pack.c b/fetch-pack.c
index 1734a573b0..051902ef6d 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1630,6 +1630,7 @@ static void update_shallow(struct fetch_pack_args *args,
 		if (*alternate_shallow_file == '\0') { /* --unshallow */
 			unlink_or_warn(git_path_shallow(the_repository));
 			rollback_lock_file(&shallow_lock);
+			reset_repository_shallow(the_repository);
 		} else
 			commit_lock_file(&shallow_lock);
 		alternate_shallow_file = NULL;
diff --git a/shallow.c b/shallow.c
index 7fd04afed1..fac383dec9 100644
--- a/shallow.c
+++ b/shallow.c
@@ -40,13 +40,6 @@ int register_shallow(struct repository *r, const struct object_id *oid)

 int is_repository_shallow(struct repository *r)
 {
-	/*
-	 * NEEDSWORK: This function updates
-	 * r->parsed_objects->{is_shallow,shallow_stat} as a side effect but
-	 * there is no corresponding function to clear them when the shallow
-	 * file is updated.
-	 */
-
 	FILE *fp;
 	char buf[1024];
 	const char *path = r->parsed_objects->alternate_shallow_file;
@@ -79,6 +72,12 @@ int is_repository_shallow(struct repository *r)
 	return r->parsed_objects->is_shallow;
 }

+void reset_repository_shallow(struct repository *r)
+{
+	r->parsed_objects->is_shallow = -1;
+	stat_validity_clear(r->parsed_objects->shallow_stat);
+}
+
 /*
  * TODO: use "int" elemtype instead of "int *" when/if commit-slab
  * supports a "valid" flag.
@@ -362,6 +361,7 @@ void setup_alternate_shallow(struct lock_file *shallow_lock,
 		 * shallow file".
 		 */
 		*alternate_shallow_file = "";
+	reset_repository_shallow(the_repository);
 	strbuf_release(&sb);
 }

@@ -411,6 +411,7 @@ void prune_shallow(unsigned options)
 			die_errno("failed to write to %s",
 				  get_lock_file_path(&shallow_lock));
 		commit_lock_file(&shallow_lock);
+		reset_repository_shallow(the_repository);
 	} else {
 		unlink(git_path_shallow(the_repository));
 		rollback_lock_file(&shallow_lock);
--
2.26.0.106.g9fadedd637
Taylor Blau April 15, 2020, 4:31 a.m. UTC | #2
On Tue, Apr 14, 2020 at 10:29:30PM -0600, Taylor Blau wrote:
> Whoops. I sent the wrong version of this patch. It should be the below:

Double whoops. I was on the wrong branch, and hit send too early. *This*
is the version of the patch that I meant to send ;).

--- >8 ---

Subject: [PATCH] commit-graph.c: introduce '--[no-]check-oids'

When operating on a stream of commit OIDs on stdin, 'git commit-graph
write' checks that each OID refers to an object that is indeed a commit.
This is convenient to make sure that the given input is well-formed, but
can sometimes be undesirable.

For example, server operators may wish to feed the refnames that were
updated during a push to 'git commit-graph write --input=stdin-commits',
and silently discard refs that don't point at commits. This can be done
by combing the output of 'git for-each-ref' with '--format
%(*objecttype)', but this requires opening up a potentially large number
of objects.  Instead, it is more convenient to feed the updated refs to
the commit-graph machinery, and let it throw out refs that don't point
to commits.

Introduce '--[no-]check-oids' to make such a behavior possible. With
'--check-oids' (the default behavior to retain backwards compatibility),
'git commit-graph write' will barf on a non-commit line in its input.
With 'no-check-oids', such lines will be silently ignored, making the
above possible by specifying this option.

No matter which is supplied, 'git commit-graph write' retains the
behavior from the previous commit of rejecting non-OID inputs like
"HEAD" and "refs/heads/foo" as before.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 Documentation/git-commit-graph.txt |  5 +++++
 builtin/commit-graph.c             | 11 ++++++++---
 commit-graph.c                     |  2 +-
 t/t5318-commit-graph.sh            | 28 ++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
index 46f7f7c573..91e8027b86 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -82,6 +82,11 @@ tip with the previous tip.
 Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
 be the current time. After writing the split commit-graph, delete all
 unused commit-graph whose modified times are older than `datetime`.
++
+The `--[no-]check-oids` option decides whether or not OIDs are required
+to be commits. By default, `--check-oids` is implied, generating an
+error on non-commit objects. If `--no-check-oids` is given, non-commits
+are silently discarded.

 'verify'::

diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index c69716aa7e..2d0a8e822a 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = {
 	N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
 	N_("git commit-graph write [--object-dir <objdir>] [--append] "
 	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-	   "[--[no-]progress] <split options>"),
+	   "[--[no-]progress] [--[no-]check-oids] <split options>"),
 	NULL
 };

@@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = {
 static const char * const builtin_commit_graph_write_usage[] = {
 	N_("git commit-graph write [--object-dir <objdir>] [--append] "
 	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-	   "[--[no-]progress] <split options>"),
+	   "[--[no-]progress] [--[no-]check-oids] <split options>"),
 	NULL
 };

@@ -36,6 +36,7 @@ static struct opts_commit_graph {
 	int split;
 	int shallow;
 	int progress;
+	int check_oids;
 } opts;

 static struct object_directory *find_odb(struct repository *r,
@@ -163,6 +164,8 @@ static int graph_write(int argc, const char **argv)
 			N_("allow writing an incremental commit-graph file"),
 			PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
 			write_option_parse_split),
+		OPT_BOOL(0, "check-oids", &opts.check_oids,
+			N_("require OIDs to be commits")),
 		OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
 			N_("maximum number of commits in a non-base split commit-graph")),
 		OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
@@ -173,6 +176,7 @@ static int graph_write(int argc, const char **argv)
 	};

 	opts.progress = isatty(2);
+	opts.check_oids = 1;
 	split_opts.size_multiple = 2;
 	split_opts.max_commits = 0;
 	split_opts.expire_time = 0;
@@ -227,7 +231,8 @@ static int graph_write(int argc, const char **argv)

 				oidset_insert(&commits, &oid);
 			}
-			flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
+			if (opts.check_oids)
+				flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
 		}

 		UNLEAK(buf);
diff --git a/commit-graph.c b/commit-graph.c
index f60346baee..b8737f0ce9 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -145,7 +145,7 @@ static int verify_commit_graph_lite(struct commit_graph *g)
 	 *
 	 * There should only be very basic checks here to ensure that
 	 * we don't e.g. segfault in fill_commit_in_graph(), but
-	 * because this is a very hot codepath nothing that e.g. loops
+	 e because this is a very hot codepath nothing that e.g. loops
 	 * over g->num_commits, or runs a checksum on the commit-graph
 	 * itself.
 	 */
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index e874a12696..7960cefa1b 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -49,6 +49,34 @@ test_expect_success 'exit with correct error on bad input to --stdin-commits' '
 	test_i18ngrep "invalid commit object id" stderr
 '

+graph_expect_commits() {
+	test-tool read-graph >got
+	if ! grep "num_commits: $1" got
+	then
+		echo "graph_expect_commits: expected $1 commit(s), got:"
+		cat got
+		false
+	fi
+}
+
+test_expect_success 'ignores non-commit OIDs to --input=stdin-commits with --no-check-oids' '
+	test_when_finished rm -rf "$objdir/info/commit-graph" &&
+	cd "$TRASH_DIRECTORY/full" &&
+	# write a graph to ensure layers are/are not added appropriately
+	git rev-parse HEAD~1 >base &&
+	git commit-graph write --stdin-commits <base &&
+	graph_expect_commits 2 &&
+	# bad input is rejected
+	echo HEAD >bad &&
+	test_expect_code 1 git commit-graph write --stdin-commits <bad 2>err &&
+	test_i18ngrep "unexpected non-hex object ID: HEAD" err &&
+	graph_expect_commits 2 &&
+	# update with valid commit OID, ignore tree OID
+	git rev-parse HEAD HEAD^{tree} >in &&
+	git commit-graph write --stdin-commits --no-check-oids <in &&
+	graph_expect_commits 3
+'
+
 graph_git_two_modes() {
 	git -c core.commitGraph=true $1 >output
 	git -c core.commitGraph=false $1 >expect
--
2.26.0.106.g9fadedd637
SZEDER Gábor April 22, 2020, 10:55 a.m. UTC | #3
On Tue, Apr 14, 2020 at 10:31:37PM -0600, Taylor Blau wrote:
> On Tue, Apr 14, 2020 at 10:29:30PM -0600, Taylor Blau wrote:
> > Whoops. I sent the wrong version of this patch. It should be the below:
> 
> Double whoops. I was on the wrong branch, and hit send too early. *This*
> is the version of the patch that I meant to send ;).
> 
> --- >8 ---
> 
> Subject: [PATCH] commit-graph.c: introduce '--[no-]check-oids'
> 
> When operating on a stream of commit OIDs on stdin, 'git commit-graph
> write' checks that each OID refers to an object that is indeed a commit.
> This is convenient to make sure that the given input is well-formed, but
> can sometimes be undesirable.
> 
> For example, server operators may wish to feed the refnames that were

s/the refnames/full commit object IDs pointed to by refs/

or something similar.

> updated during a push to 'git commit-graph write --input=stdin-commits',
> and silently discard refs that don't point at commits.

s/refs/<something along the lines of the above>/

> This can be done
> by combing the output of 'git for-each-ref' with '--format
> %(*objecttype)', but this requires opening up a potentially large number
> of objects.  Instead, it is more convenient to feed the updated refs to

s/refs/.../

> the commit-graph machinery, and let it throw out refs that don't point

s/refs/.../

> to commits.
> 
> Introduce '--[no-]check-oids' to make such a behavior possible. With
> '--check-oids' (the default behavior to retain backwards compatibility),
> 'git commit-graph write' will barf on a non-commit line in its input.
> With 'no-check-oids', such lines will be silently ignored, making the

s/no-check-oids/--no-check-oids/

> above possible by specifying this option.
> 
> No matter which is supplied, 'git commit-graph write' retains the
> behavior from the previous commit of rejecting non-OID inputs like
> "HEAD" and "refs/heads/foo" as before.

See? :)  This is why all those s/// are necessary.

> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
>  Documentation/git-commit-graph.txt |  5 +++++
>  builtin/commit-graph.c             | 11 ++++++++---
>  commit-graph.c                     |  2 +-
>  t/t5318-commit-graph.sh            | 28 ++++++++++++++++++++++++++++
>  4 files changed, 42 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
> index 46f7f7c573..91e8027b86 100644
> --- a/Documentation/git-commit-graph.txt
> +++ b/Documentation/git-commit-graph.txt
> @@ -82,6 +82,11 @@ tip with the previous tip.
>  Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
>  be the current time. After writing the split commit-graph, delete all
>  unused commit-graph whose modified times are older than `datetime`.
> ++
> +The `--[no-]check-oids` option decides whether or not OIDs are required
> +to be commits. By default, `--check-oids` is implied, generating an
> +error on non-commit objects. If `--no-check-oids` is given, non-commits
> +are silently discarded.

What happens with OIDs of tags, in particular with OIDs of tags that
can be peeled down to commit objects?  According to (my (too
pedantic?) interpretation of) the above description they will trigger
an error with '--check-oids' or will be ignored with
'--no-check-oids'.  The implementation, however, accepts those oids
and peels them down to commit objects; I think this is the right
behaviour.

What happens with OIDs that name non-existing objects?

>  'verify'::
> 
> diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
> index c69716aa7e..2d0a8e822a 100644
> --- a/builtin/commit-graph.c
> +++ b/builtin/commit-graph.c
> @@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = {
>  	N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
>  	N_("git commit-graph write [--object-dir <objdir>] [--append] "
>  	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
> -	   "[--[no-]progress] <split options>"),
> +	   "[--[no-]progress] [--[no-]check-oids] <split options>"),
>  	NULL
>  };
> 
> @@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = {
>  static const char * const builtin_commit_graph_write_usage[] = {
>  	N_("git commit-graph write [--object-dir <objdir>] [--append] "
>  	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
> -	   "[--[no-]progress] <split options>"),
> +	   "[--[no-]progress] [--[no-]check-oids] <split options>"),
>  	NULL
>  };
> 
> @@ -36,6 +36,7 @@ static struct opts_commit_graph {
>  	int split;
>  	int shallow;
>  	int progress;
> +	int check_oids;
>  } opts;
> 
>  static struct object_directory *find_odb(struct repository *r,
> @@ -163,6 +164,8 @@ static int graph_write(int argc, const char **argv)
>  			N_("allow writing an incremental commit-graph file"),
>  			PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
>  			write_option_parse_split),
> +		OPT_BOOL(0, "check-oids", &opts.check_oids,
> +			N_("require OIDs to be commits")),
>  		OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
>  			N_("maximum number of commits in a non-base split commit-graph")),
>  		OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
> @@ -173,6 +176,7 @@ static int graph_write(int argc, const char **argv)
>  	};
> 
>  	opts.progress = isatty(2);
> +	opts.check_oids = 1;
>  	split_opts.size_multiple = 2;
>  	split_opts.max_commits = 0;
>  	split_opts.expire_time = 0;
> @@ -227,7 +231,8 @@ static int graph_write(int argc, const char **argv)
> 
>  				oidset_insert(&commits, &oid);
>  			}
> -			flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
> +			if (opts.check_oids)
> +				flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
>  		}
> 
>  		UNLEAK(buf);
> diff --git a/commit-graph.c b/commit-graph.c
> index f60346baee..b8737f0ce9 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -145,7 +145,7 @@ static int verify_commit_graph_lite(struct commit_graph *g)
>  	 *
>  	 * There should only be very basic checks here to ensure that
>  	 * we don't e.g. segfault in fill_commit_in_graph(), but
> -	 * because this is a very hot codepath nothing that e.g. loops
> +	 e because this is a very hot codepath nothing that e.g. loops

Bogus hunk, perhaps?

>  	 * over g->num_commits, or runs a checksum on the commit-graph
>  	 * itself.
>  	 */
> diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
> index e874a12696..7960cefa1b 100755
> --- a/t/t5318-commit-graph.sh
> +++ b/t/t5318-commit-graph.sh
> @@ -49,6 +49,34 @@ test_expect_success 'exit with correct error on bad input to --stdin-commits' '
>  	test_i18ngrep "invalid commit object id" stderr
>  '
> 
> +graph_expect_commits() {
> +	test-tool read-graph >got
> +	if ! grep "num_commits: $1" got
> +	then
> +		echo "graph_expect_commits: expected $1 commit(s), got:"
> +		cat got
> +		false
> +	fi
> +}
> +
> +test_expect_success 'ignores non-commit OIDs to --input=stdin-commits with --no-check-oids' '
> +	test_when_finished rm -rf "$objdir/info/commit-graph" &&
> +	cd "$TRASH_DIRECTORY/full" &&
> +	# write a graph to ensure layers are/are not added appropriately
> +	git rev-parse HEAD~1 >base &&
> +	git commit-graph write --stdin-commits <base &&
> +	graph_expect_commits 2 &&
> +	# bad input is rejected
> +	echo HEAD >bad &&
> +	test_expect_code 1 git commit-graph write --stdin-commits <bad 2>err &&
> +	test_i18ngrep "unexpected non-hex object ID: HEAD" err &&
> +	graph_expect_commits 2 &&
> +	# update with valid commit OID, ignore tree OID
> +	git rev-parse HEAD HEAD^{tree} >in &&
> +	git commit-graph write --stdin-commits --no-check-oids <in &&
> +	graph_expect_commits 3
> +'
> +
>  graph_git_two_modes() {
>  	git -c core.commitGraph=true $1 >output
>  	git -c core.commitGraph=false $1 >expect
> --
> 2.26.0.106.g9fadedd637
>
Taylor Blau April 22, 2020, 11:39 p.m. UTC | #4
On Wed, Apr 22, 2020 at 12:55:36PM +0200, SZEDER Gábor wrote:
> On Tue, Apr 14, 2020 at 10:31:37PM -0600, Taylor Blau wrote:
> > On Tue, Apr 14, 2020 at 10:29:30PM -0600, Taylor Blau wrote:
> > > Whoops. I sent the wrong version of this patch. It should be the below:
> >
> > Double whoops. I was on the wrong branch, and hit send too early. *This*
> > is the version of the patch that I meant to send ;).
> >
> > --- >8 ---
> >
> > Subject: [PATCH] commit-graph.c: introduce '--[no-]check-oids'
> >
> > When operating on a stream of commit OIDs on stdin, 'git commit-graph
> > write' checks that each OID refers to an object that is indeed a commit.
> > This is convenient to make sure that the given input is well-formed, but
> > can sometimes be undesirable.
> >
> > For example, server operators may wish to feed the refnames that were
>
> s/the refnames/full commit object IDs pointed to by refs/
>
> or something similar.
>
> > updated during a push to 'git commit-graph write --input=stdin-commits',
> > and silently discard refs that don't point at commits.
>
> s/refs/<something along the lines of the above>/
>
> > This can be done
> > by combing the output of 'git for-each-ref' with '--format
> > %(*objecttype)', but this requires opening up a potentially large number
> > of objects.  Instead, it is more convenient to feed the updated refs to
>
> s/refs/.../
>
> > the commit-graph machinery, and let it throw out refs that don't point
>
> s/refs/.../
>
> > to commits.
> >
> > Introduce '--[no-]check-oids' to make such a behavior possible. With
> > '--check-oids' (the default behavior to retain backwards compatibility),
> > 'git commit-graph write' will barf on a non-commit line in its input.
> > With 'no-check-oids', such lines will be silently ignored, making the
>
> s/no-check-oids/--no-check-oids/
>
> > above possible by specifying this option.
> >
> > No matter which is supplied, 'git commit-graph write' retains the
> > behavior from the previous commit of rejecting non-OID inputs like
> > "HEAD" and "refs/heads/foo" as before.
>
> See? :)  This is why all those s/// are necessary.

Ah :). All good points; thanks for your suggestions.

> > Signed-off-by: Taylor Blau <me@ttaylorr.com>
> > ---
> >  Documentation/git-commit-graph.txt |  5 +++++
> >  builtin/commit-graph.c             | 11 ++++++++---
> >  commit-graph.c                     |  2 +-
> >  t/t5318-commit-graph.sh            | 28 ++++++++++++++++++++++++++++
> >  4 files changed, 42 insertions(+), 4 deletions(-)
> >
> > diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
> > index 46f7f7c573..91e8027b86 100644
> > --- a/Documentation/git-commit-graph.txt
> > +++ b/Documentation/git-commit-graph.txt
> > @@ -82,6 +82,11 @@ tip with the previous tip.
> >  Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
> >  be the current time. After writing the split commit-graph, delete all
> >  unused commit-graph whose modified times are older than `datetime`.
> > ++
> > +The `--[no-]check-oids` option decides whether or not OIDs are required
> > +to be commits. By default, `--check-oids` is implied, generating an
> > +error on non-commit objects. If `--no-check-oids` is given, non-commits
> > +are silently discarded.
>
> What happens with OIDs of tags, in particular with OIDs of tags that
> can be peeled down to commit objects?  According to (my (too
> pedantic?) interpretation of) the above description they will trigger
> an error with '--check-oids' or will be ignored with
> '--no-check-oids'.  The implementation, however, accepts those oids
> and peels them down to commit objects; I think this is the right
> behaviour.

That's right, and certainly merits a mention in the documentation. I've
added that...

> What happens with OIDs that name non-existing objects?

...these are silently discarded. I think that you could make a
compelling argument in either direction on this one, but I'm slightly
swayed towards "discard these, too", since '--no-check-oids' is
literally saying "don't check these".

I guess that pushes us into the territory of whether or not "check" is
the right verb. "verify"? "scrutinize" :)? Do you have any thoughts
here?

If you're otherwise satisfied with this series, here's the updated
patch.

-- >8 --

Subject: [PATCH] commit-graph.c: introduce '--[no-]check-oids'

When operating on a stream of commit OIDs on stdin, 'git commit-graph
write' checks that each OID refers to an object that is indeed a commit.
This is convenient to make sure that the given input is well-formed, but
can sometimes be undesirable.

For example, server operators may wish to feed the full commit object
IDs pointed to by refs that were updated during a push to 'git
commit-graph write --input=stdin-commits', and silently discard any
input that doesn't point at a commit. This can be done by combing the
output of 'git for-each-ref' with '--format %(*objecttype)', but this
requires opening up a potentially large number of objects.  Instead, it
is more convenient to feed the updated object IDs to the commit-graph
machinery, and let it throw out whatever remains.  to commits.

Introduce '--[no-]check-oids' to make such a behavior possible. With
'--check-oids' (the default behavior to retain backwards compatibility),
'git commit-graph write' will barf on a non-commit line in its input.
With '--no-check-oids', such lines will be silently ignored, making the
above possible by specifying this option.

No matter which is supplied, 'git commit-graph write' retains the
behavior from the previous commit of rejecting non-OID inputs like
"HEAD" and "refs/heads/foo" as before.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 Documentation/git-commit-graph.txt |  6 ++++++
 builtin/commit-graph.c             | 11 ++++++++---
 t/t5318-commit-graph.sh            | 28 ++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
index 46f7f7c573..6bdbe42766 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -82,6 +82,12 @@ tip with the previous tip.
 Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
 be the current time. After writing the split commit-graph, delete all
 unused commit-graph whose modified times are older than `datetime`.
++
+The `--[no-]check-oids` option decides whether or not OIDs are required
+to be commits. By default, `--check-oids` is implied, generating an
+error on non-commit objects. If `--no-check-oids` is given, non-commits
+and non-existent objects are silently discarded. In either case, tags
+are peeled down to the object they reference.

 'verify'::

diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index c69716aa7e..2d0a8e822a 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = {
 	N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
 	N_("git commit-graph write [--object-dir <objdir>] [--append] "
 	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-	   "[--[no-]progress] <split options>"),
+	   "[--[no-]progress] [--[no-]check-oids] <split options>"),
 	NULL
 };

@@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = {
 static const char * const builtin_commit_graph_write_usage[] = {
 	N_("git commit-graph write [--object-dir <objdir>] [--append] "
 	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-	   "[--[no-]progress] <split options>"),
+	   "[--[no-]progress] [--[no-]check-oids] <split options>"),
 	NULL
 };

@@ -36,6 +36,7 @@ static struct opts_commit_graph {
 	int split;
 	int shallow;
 	int progress;
+	int check_oids;
 } opts;

 static struct object_directory *find_odb(struct repository *r,
@@ -163,6 +164,8 @@ static int graph_write(int argc, const char **argv)
 			N_("allow writing an incremental commit-graph file"),
 			PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
 			write_option_parse_split),
+		OPT_BOOL(0, "check-oids", &opts.check_oids,
+			N_("require OIDs to be commits")),
 		OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
 			N_("maximum number of commits in a non-base split commit-graph")),
 		OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
@@ -173,6 +176,7 @@ static int graph_write(int argc, const char **argv)
 	};

 	opts.progress = isatty(2);
+	opts.check_oids = 1;
 	split_opts.size_multiple = 2;
 	split_opts.max_commits = 0;
 	split_opts.expire_time = 0;
@@ -227,7 +231,8 @@ static int graph_write(int argc, const char **argv)

 				oidset_insert(&commits, &oid);
 			}
-			flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
+			if (opts.check_oids)
+				flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
 		}

 		UNLEAK(buf);
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index e874a12696..7960cefa1b 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -49,6 +49,34 @@ test_expect_success 'exit with correct error on bad input to --stdin-commits' '
 	test_i18ngrep "invalid commit object id" stderr
 '

+graph_expect_commits() {
+	test-tool read-graph >got
+	if ! grep "num_commits: $1" got
+	then
+		echo "graph_expect_commits: expected $1 commit(s), got:"
+		cat got
+		false
+	fi
+}
+
+test_expect_success 'ignores non-commit OIDs to --input=stdin-commits with --no-check-oids' '
+	test_when_finished rm -rf "$objdir/info/commit-graph" &&
+	cd "$TRASH_DIRECTORY/full" &&
+	# write a graph to ensure layers are/are not added appropriately
+	git rev-parse HEAD~1 >base &&
+	git commit-graph write --stdin-commits <base &&
+	graph_expect_commits 2 &&
+	# bad input is rejected
+	echo HEAD >bad &&
+	test_expect_code 1 git commit-graph write --stdin-commits <bad 2>err &&
+	test_i18ngrep "unexpected non-hex object ID: HEAD" err &&
+	graph_expect_commits 2 &&
+	# update with valid commit OID, ignore tree OID
+	git rev-parse HEAD HEAD^{tree} >in &&
+	git commit-graph write --stdin-commits --no-check-oids <in &&
+	graph_expect_commits 3
+'
+
 graph_git_two_modes() {
 	git -c core.commitGraph=true $1 >output
 	git -c core.commitGraph=false $1 >expect
--
2.26.0.113.ge9739cdccc
SZEDER Gábor April 24, 2020, 10:59 a.m. UTC | #5
On Wed, Apr 22, 2020 at 05:39:30PM -0600, Taylor Blau wrote:
> > > diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
> > > index 46f7f7c573..91e8027b86 100644
> > > --- a/Documentation/git-commit-graph.txt
> > > +++ b/Documentation/git-commit-graph.txt
> > > @@ -82,6 +82,11 @@ tip with the previous tip.
> > >  Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
> > >  be the current time. After writing the split commit-graph, delete all
> > >  unused commit-graph whose modified times are older than `datetime`.
> > > ++
> > > +The `--[no-]check-oids` option decides whether or not OIDs are required
> > > +to be commits. By default, `--check-oids` is implied, generating an
> > > +error on non-commit objects. If `--no-check-oids` is given, non-commits
> > > +are silently discarded.
> >
> > What happens with OIDs of tags, in particular with OIDs of tags that
> > can be peeled down to commit objects?  According to (my (too
> > pedantic?) interpretation of) the above description they will trigger
> > an error with '--check-oids' or will be ignored with
> > '--no-check-oids'.  The implementation, however, accepts those oids
> > and peels them down to commit objects; I think this is the right
> > behaviour.
> 
> That's right, and certainly merits a mention in the documentation. I've
> added that...
> 
> > What happens with OIDs that name non-existing objects?
> 
> ...these are silently discarded. I think that you could make a
> compelling argument in either direction on this one, but I'm slightly
> swayed towards "discard these, too", since '--no-check-oids' is
> literally saying "don't check these".

I don't want to argue either way, but I'd argue for making a conscious
decision that is justified in the commit message and documented in the
docs.

So, the option is '--stdin-commits' or '--input=stdin-commits', but
it's not only about commits.  Now, allowing OIDs of tags pointing to
commits and peeling them makes obvious sense, because we want commits
reachable from those tags included in the commit-graph file.  Allowing
OIDs of tags pointing to non-commits and silently ignoring them makes
sense (to better support your 'git f-e-r ... | git c-g write ...' use
case), though it's not that obvious (after all I managed to overlook
it back then, that's why we are now here discussing these
'--check-oids' patches).

But I'm not sure about silently ignoring OIDs pointing to non-existent
objects, because those might be a sign of some issues in whatever is
generating the list of objects to be fed to 'git c-g write'.  E.g. there
could be a race between 'git for-each-ref' listing OIDs and some other
processes pruning them.  Does this worth worrying about?  Dunno...
but at least let's think it through, and record in the commit message
why we made that decision, whatever that decision might be.

> I guess that pushes us into the territory of whether or not "check" is
> the right verb. "verify"? 

Oh, I didn't think about this, but now that you mention it we have
'--verify' in 'git rev-parse', 'git tag' and elsewhere, and we have
'verify-commit', 'verify-path' and 'verify-tag' commands.  So
'--verify-oids' might be more consistent.  I kind of like the 'oids'
suffix in the option name, though I don't know what else we might want
to verify in this command in the future...

> "scrutinize" :)?

Huhh, erm, no ;)

> If you're otherwise satisfied with this series, here's the updated
> patch.

I haven't yet looked closely at the rest of the series...  The
documentation update in the updated patch below looks good to me,
thanks.

> -- >8 --
> 
> Subject: [PATCH] commit-graph.c: introduce '--[no-]check-oids'
> 
> When operating on a stream of commit OIDs on stdin, 'git commit-graph
> write' checks that each OID refers to an object that is indeed a commit.
> This is convenient to make sure that the given input is well-formed, but
> can sometimes be undesirable.
> 
> For example, server operators may wish to feed the full commit object
> IDs pointed to by refs that were updated during a push to 'git
> commit-graph write --input=stdin-commits', and silently discard any
> input that doesn't point at a commit. This can be done by combing the
> output of 'git for-each-ref' with '--format %(*objecttype)', but this
> requires opening up a potentially large number of objects.  Instead, it
> is more convenient to feed the updated object IDs to the commit-graph
> machinery, and let it throw out whatever remains.  to commits.

Either the bulk of a sentence is missing, or there is a stray(?) "to
commits." at the end of this line.

> Introduce '--[no-]check-oids' to make such a behavior possible. With
> '--check-oids' (the default behavior to retain backwards compatibility),
> 'git commit-graph write' will barf on a non-commit line in its input.
> With '--no-check-oids', such lines will be silently ignored, making the
> above possible by specifying this option.
> 
> No matter which is supplied, 'git commit-graph write' retains the
> behavior from the previous commit of rejecting non-OID inputs like
> "HEAD" and "refs/heads/foo" as before.
> 
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
>  Documentation/git-commit-graph.txt |  6 ++++++
>  builtin/commit-graph.c             | 11 ++++++++---
>  t/t5318-commit-graph.sh            | 28 ++++++++++++++++++++++++++++
>  3 files changed, 42 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
> index 46f7f7c573..6bdbe42766 100644
> --- a/Documentation/git-commit-graph.txt
> +++ b/Documentation/git-commit-graph.txt
> @@ -82,6 +82,12 @@ tip with the previous tip.
>  Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
>  be the current time. After writing the split commit-graph, delete all
>  unused commit-graph whose modified times are older than `datetime`.
> ++
> +The `--[no-]check-oids` option decides whether or not OIDs are required
> +to be commits. By default, `--check-oids` is implied, generating an
> +error on non-commit objects. If `--no-check-oids` is given, non-commits
> +and non-existent objects are silently discarded. In either case, tags
> +are peeled down to the object they reference.
Taylor Blau May 1, 2020, 10:38 p.m. UTC | #6
On Fri, Apr 24, 2020 at 12:59:57PM +0200, SZEDER Gábor wrote:
> On Wed, Apr 22, 2020 at 05:39:30PM -0600, Taylor Blau wrote:
> > > > diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
> > > > index 46f7f7c573..91e8027b86 100644
> > > > --- a/Documentation/git-commit-graph.txt
> > > > +++ b/Documentation/git-commit-graph.txt
> > > > @@ -82,6 +82,11 @@ tip with the previous tip.
> > > >  Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
> > > >  be the current time. After writing the split commit-graph, delete all
> > > >  unused commit-graph whose modified times are older than `datetime`.
> > > > ++
> > > > +The `--[no-]check-oids` option decides whether or not OIDs are required
> > > > +to be commits. By default, `--check-oids` is implied, generating an
> > > > +error on non-commit objects. If `--no-check-oids` is given, non-commits
> > > > +are silently discarded.
> > >
> > > What happens with OIDs of tags, in particular with OIDs of tags that
> > > can be peeled down to commit objects?  According to (my (too
> > > pedantic?) interpretation of) the above description they will trigger
> > > an error with '--check-oids' or will be ignored with
> > > '--no-check-oids'.  The implementation, however, accepts those oids
> > > and peels them down to commit objects; I think this is the right
> > > behaviour.
> >
> > That's right, and certainly merits a mention in the documentation. I've
> > added that...
> >
> > > What happens with OIDs that name non-existing objects?
> >
> > ...these are silently discarded. I think that you could make a
> > compelling argument in either direction on this one, but I'm slightly
> > swayed towards "discard these, too", since '--no-check-oids' is
> > literally saying "don't check these".
>
> I don't want to argue either way, but I'd argue for making a conscious
> decision that is justified in the commit message and documented in the
> docs.

Me either, I very much welcome your consistently thoughtful replies
(even if my extreme delay in responding to this one would suggest
otherwise... ;)).

> So, the option is '--stdin-commits' or '--input=stdin-commits', but
> it's not only about commits.  Now, allowing OIDs of tags pointing to
> commits and peeling them makes obvious sense, because we want commits
> reachable from those tags included in the commit-graph file.  Allowing
> OIDs of tags pointing to non-commits and silently ignoring them makes
> sense (to better support your 'git f-e-r ... | git c-g write ...' use
> case), though it's not that obvious (after all I managed to overlook
> it back then, that's why we are now here discussing these
> '--check-oids' patches).
>
> But I'm not sure about silently ignoring OIDs pointing to non-existent
> objects, because those might be a sign of some issues in whatever is
> generating the list of objects to be fed to 'git c-g write'.  E.g. there
> could be a race between 'git for-each-ref' listing OIDs and some other
> processes pruning them.  Does this worth worrying about?  Dunno...
> but at least let's think it through, and record in the commit message
> why we made that decision, whatever that decision might be.

Yeah, I think that the most reasonable behavior is definitely that we
should complain about non-existent objects over 'git commit-graph write
--stdin-commits' no matter if '--[no-]check-oids' is given or not.

But, let's step back for a minute. What are we actually hoping to
accomplish with '--check-oids'? I wrote this patch because I wanted a
way to support 'git for-each-ref' piping into 'git commit-graph write'
without having to juggle which tags peel down to commits and which
don't.

Now, I figured that it would be unreasonable to change the default
behavior of 'git commit-graph write --stdin-commits' (which is to
complain and error out in this circumstance), so I added
'--no-check-oids' as a way to avoid that behavior for callers that want
that.

But, are there ever any callers that *wouldn't* want this behavior? As
far as I can tell, probably not. We're only going to be permitting
*more* inputs to 'git commit-graph write', and I seriously doubt that
anybody is depending on the above behavior. (Of course, if that's not
the case, I'd love for somebody to speak up here and we can continue
the course on this patch).

So, I propose the following:

  * We drop the idea of '--[no-]{check,verify}-oids', and always
    silently ignore non-commit inputs, retaining the existing behavior
    of always complaining about things that aren't valid hex OIDs, such
    as "HEAD".

  * We always error out on missing or corrupt commit OIDs, including
    valid OIDs that don't resolve to any object, or resolve to a tag
    that can't be fully peeled.

Does that seem reasonable?

> > I guess that pushes us into the territory of whether or not "check" is
> > the right verb. "verify"?
>
> Oh, I didn't think about this, but now that you mention it we have
> '--verify' in 'git rev-parse', 'git tag' and elsewhere, and we have
> 'verify-commit', 'verify-path' and 'verify-tag' commands.  So
> '--verify-oids' might be more consistent.  I kind of like the 'oids'
> suffix in the option name, though I don't know what else we might want
> to verify in this command in the future...
>
> > "scrutinize" :)?
>
> Huhh, erm, no ;)
>
> > If you're otherwise satisfied with this series, here's the updated
> > patch.
>
> I haven't yet looked closely at the rest of the series...  The
> documentation update in the updated patch below looks good to me,
> thanks.
>
> > -- >8 --
> >
> > Subject: [PATCH] commit-graph.c: introduce '--[no-]check-oids'
> >
> > When operating on a stream of commit OIDs on stdin, 'git commit-graph
> > write' checks that each OID refers to an object that is indeed a commit.
> > This is convenient to make sure that the given input is well-formed, but
> > can sometimes be undesirable.
> >
> > For example, server operators may wish to feed the full commit object
> > IDs pointed to by refs that were updated during a push to 'git
> > commit-graph write --input=stdin-commits', and silently discard any
> > input that doesn't point at a commit. This can be done by combing the
> > output of 'git for-each-ref' with '--format %(*objecttype)', but this
> > requires opening up a potentially large number of objects.  Instead, it
> > is more convenient to feed the updated object IDs to the commit-graph
> > machinery, and let it throw out whatever remains.  to commits.
>
> Either the bulk of a sentence is missing, or there is a stray(?) "to
> commits." at the end of this line.
>
> > Introduce '--[no-]check-oids' to make such a behavior possible. With
> > '--check-oids' (the default behavior to retain backwards compatibility),
> > 'git commit-graph write' will barf on a non-commit line in its input.
> > With '--no-check-oids', such lines will be silently ignored, making the
> > above possible by specifying this option.
> >
> > No matter which is supplied, 'git commit-graph write' retains the
> > behavior from the previous commit of rejecting non-OID inputs like
> > "HEAD" and "refs/heads/foo" as before.
> >
> > Signed-off-by: Taylor Blau <me@ttaylorr.com>
> > ---
> >  Documentation/git-commit-graph.txt |  6 ++++++
> >  builtin/commit-graph.c             | 11 ++++++++---
> >  t/t5318-commit-graph.sh            | 28 ++++++++++++++++++++++++++++
> >  3 files changed, 42 insertions(+), 3 deletions(-)
> >
> > diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
> > index 46f7f7c573..6bdbe42766 100644
> > --- a/Documentation/git-commit-graph.txt
> > +++ b/Documentation/git-commit-graph.txt
> > @@ -82,6 +82,12 @@ tip with the previous tip.
> >  Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
> >  be the current time. After writing the split commit-graph, delete all
> >  unused commit-graph whose modified times are older than `datetime`.
> > ++
> > +The `--[no-]check-oids` option decides whether or not OIDs are required
> > +to be commits. By default, `--check-oids` is implied, generating an
> > +error on non-commit objects. If `--no-check-oids` is given, non-commits
> > +and non-existent objects are silently discarded. In either case, tags
> > +are peeled down to the object they reference.

Thanks,
Taylor
Jeff King May 3, 2020, 9:40 a.m. UTC | #7
On Fri, May 01, 2020 at 04:38:48PM -0600, Taylor Blau wrote:

> But, are there ever any callers that *wouldn't* want this behavior? As
> far as I can tell, probably not. We're only going to be permitting
> *more* inputs to 'git commit-graph write', and I seriously doubt that
> anybody is depending on the above behavior. (Of course, if that's not
> the case, I'd love for somebody to speak up here and we can continue
> the course on this patch).
> 
> So, I propose the following:
> 
>   * We drop the idea of '--[no-]{check,verify}-oids', and always
>     silently ignore non-commit inputs, retaining the existing behavior
>     of always complaining about things that aren't valid hex OIDs, such
>     as "HEAD".
> 
>   * We always error out on missing or corrupt commit OIDs, including
>     valid OIDs that don't resolve to any object, or resolve to a tag
>     that can't be fully peeled.
> 
> Does that seem reasonable?

FWIW, I think that is the best direction. If anybody is depending on the
"commit-graph write will complain about non-commits" behavior, they
could only be doing so for a few versions; prior to v2.24.0 we did not.

-Peff
Junio C Hamano May 3, 2020, 4:55 p.m. UTC | #8
Jeff King <peff@peff.net> writes:

> On Fri, May 01, 2020 at 04:38:48PM -0600, Taylor Blau wrote:
> ...
>> So, I propose the following:
>> 
>>   * We drop the idea of '--[no-]{check,verify}-oids', and always
>>     silently ignore non-commit inputs, retaining the existing behavior
>>     of always complaining about things that aren't valid hex OIDs, such
>>     as "HEAD".
>> 
>>   * We always error out on missing or corrupt commit OIDs, including
>>     valid OIDs that don't resolve to any object, or resolve to a tag
>>     that can't be fully peeled.
>> 
>> Does that seem reasonable?
>
> FWIW, I think that is the best direction. If anybody is depending on the
> "commit-graph write will complain about non-commits" behavior, they
> could only be doing so for a few versions; prior to v2.24.0 we did not.

If we had it for the past 180 days or so, that's not like " people
have seen it for only a brief time", but working it around shouldn't
be too difficult---they need to validate the input they feed to the
command themselves (or do they need to do more?).

Thanks.
Jeff King May 4, 2020, 2:59 p.m. UTC | #9
On Sun, May 03, 2020 at 09:55:39AM -0700, Junio C Hamano wrote:

> >> Does that seem reasonable?
> >
> > FWIW, I think that is the best direction. If anybody is depending on the
> > "commit-graph write will complain about non-commits" behavior, they
> > could only be doing so for a few versions; prior to v2.24.0 we did not.
> 
> If we had it for the past 180 days or so, that's not like " people
> have seen it for only a brief time", but working it around shouldn't
> be too difficult---they need to validate the input they feed to the
> command themselves (or do they need to do more?).

Yeah, my point wasn't so much that it was brief as that we've had it
both ways, and nobody was complaining about it before v2.24.0 (the
type-restriction change came as a side effect of another tightening).

But yeah, if somebody really wants that validation, they can do it
themselves with "cat-file --batch-check". Or even for-each-ref directly:

  git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
  awk '/commit/ { print $1 }' |
  git commit-graph write --stdin-commits

If you're using --stdin-commits, you're presumably processing the input
anyway (since otherwise you'd just be using --reachable).

I suppose you could argue the other way, too (that the user could be
filtering out non-commits). But so far we have one data point in either
direction, and it wants the more forgiving behavior. :)

-Peff
Junio C Hamano May 4, 2020, 4:29 p.m. UTC | #10
Jeff King <peff@peff.net> writes:

> On Sun, May 03, 2020 at 09:55:39AM -0700, Junio C Hamano wrote:
>
>> >> Does that seem reasonable?
>> >
>> > FWIW, I think that is the best direction. If anybody is depending on the
>> > "commit-graph write will complain about non-commits" behavior, they
>> > could only be doing so for a few versions; prior to v2.24.0 we did not.
>> 
>> If we had it for the past 180 days or so, that's not like " people
>> have seen it for only a brief time", but working it around shouldn't
>> be too difficult---they need to validate the input they feed to the
>> command themselves (or do they need to do more?).
>
> Yeah, my point wasn't so much that it was brief as that we've had it
> both ways, and nobody was complaining about it before v2.24.0 (the
> type-restriction change came as a side effect of another tightening).
>
> But yeah, if somebody really wants that validation, they can do it
> themselves with "cat-file --batch-check". Or even for-each-ref directly:
>
>   git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
>   awk '/commit/ { print $1 }' |
>   git commit-graph write --stdin-commits
>
> If you're using --stdin-commits, you're presumably processing the input
> anyway (since otherwise you'd just be using --reachable).
>
> I suppose you could argue the other way, too (that the user could be
> filtering out non-commits). But so far we have one data point in either
> direction, and it wants the more forgiving behavior. :)

Yup.  I agree that Taylor outlined the best direction going forward.

Thanks.
Taylor Blau May 4, 2020, 10:16 p.m. UTC | #11
On Mon, May 04, 2020 at 09:29:29AM -0700, Junio C Hamano wrote:
> Jeff King <peff@peff.net> writes:
>
> > On Sun, May 03, 2020 at 09:55:39AM -0700, Junio C Hamano wrote:
> >
> >> >> Does that seem reasonable?
> >> >
> >> > FWIW, I think that is the best direction. If anybody is depending on the
> >> > "commit-graph write will complain about non-commits" behavior, they
> >> > could only be doing so for a few versions; prior to v2.24.0 we did not.
> >>
> >> If we had it for the past 180 days or so, that's not like " people
> >> have seen it for only a brief time", but working it around shouldn't
> >> be too difficult---they need to validate the input they feed to the
> >> command themselves (or do they need to do more?).
> >
> > Yeah, my point wasn't so much that it was brief as that we've had it
> > both ways, and nobody was complaining about it before v2.24.0 (the
> > type-restriction change came as a side effect of another tightening).
> >
> > But yeah, if somebody really wants that validation, they can do it
> > themselves with "cat-file --batch-check". Or even for-each-ref directly:
> >
> >   git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
> >   awk '/commit/ { print $1 }' |
> >   git commit-graph write --stdin-commits
> >
> > If you're using --stdin-commits, you're presumably processing the input
> > anyway (since otherwise you'd just be using --reachable).
> >
> > I suppose you could argue the other way, too (that the user could be
> > filtering out non-commits). But so far we have one data point in either
> > direction, and it wants the more forgiving behavior. :)
>
> Yup.  I agree that Taylor outlined the best direction going forward.

Thanks, both. I'll send a series to address this shortly. (I've been
sitting on it in ttaylorr/git for a couple of days over the weekend,
but a few other things today have gotten in the way of me sending it to
the list.)

> Thanks.

Thanks,
Taylor
diff mbox series

Patch

diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
index 46f7f7c573..91e8027b86 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -82,6 +82,11 @@  tip with the previous tip.
 Finally, if `--expire-time=<datetime>` is not specified, let `datetime`
 be the current time. After writing the split commit-graph, delete all
 unused commit-graph whose modified times are older than `datetime`.
++
+The `--[no-]check-oids` option decides whether or not OIDs are required
+to be commits. By default, `--check-oids` is implied, generating an
+error on non-commit objects. If `--no-check-oids` is given, non-commits
+are silently discarded.
 
 'verify'::
 
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index c69716aa7e..2d0a8e822a 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -11,7 +11,7 @@  static char const * const builtin_commit_graph_usage[] = {
 	N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
 	N_("git commit-graph write [--object-dir <objdir>] [--append] "
 	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-	   "[--[no-]progress] <split options>"),
+	   "[--[no-]progress] [--[no-]check-oids] <split options>"),
 	NULL
 };
 
@@ -23,7 +23,7 @@  static const char * const builtin_commit_graph_verify_usage[] = {
 static const char * const builtin_commit_graph_write_usage[] = {
 	N_("git commit-graph write [--object-dir <objdir>] [--append] "
 	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-	   "[--[no-]progress] <split options>"),
+	   "[--[no-]progress] [--[no-]check-oids] <split options>"),
 	NULL
 };
 
@@ -36,6 +36,7 @@  static struct opts_commit_graph {
 	int split;
 	int shallow;
 	int progress;
+	int check_oids;
 } opts;
 
 static struct object_directory *find_odb(struct repository *r,
@@ -163,6 +164,8 @@  static int graph_write(int argc, const char **argv)
 			N_("allow writing an incremental commit-graph file"),
 			PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
 			write_option_parse_split),
+		OPT_BOOL(0, "check-oids", &opts.check_oids,
+			N_("require OIDs to be commits")),
 		OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
 			N_("maximum number of commits in a non-base split commit-graph")),
 		OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
@@ -173,6 +176,7 @@  static int graph_write(int argc, const char **argv)
 	};
 
 	opts.progress = isatty(2);
+	opts.check_oids = 1;
 	split_opts.size_multiple = 2;
 	split_opts.max_commits = 0;
 	split_opts.expire_time = 0;
@@ -227,7 +231,8 @@  static int graph_write(int argc, const char **argv)
 
 				oidset_insert(&commits, &oid);
 			}
-			flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
+			if (opts.check_oids)
+				flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
 		}
 
 		UNLEAK(buf);
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index e874a12696..2cbd301abe 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -49,6 +49,34 @@  test_expect_success 'exit with correct error on bad input to --stdin-commits' '
 	test_i18ngrep "invalid commit object id" stderr
 '
 
+graph_expect_commits() {
+	test-tool read-graph >got
+	if ! grep "num_commits: $1" got
+	then
+		echo "graph_expect_commits: expected $1 commit(s), got:"
+		cat got
+		false
+	fi
+}
+
+test_expect_success 'ignores non-commit OIDs to --input=stdin-commits with --no-check-oids' '
+	test_when_finished rm -rf "$objdir/info/commit-graph" &&
+	cd "$TRASH_DIRECTORY/full" &&
+	# write a graph to ensure layers are/are not added appropriately
+	git rev-parse HEAD~1 >base &&
+	git commit-graph write --stdin-commits <base &&
+	graph_expect_commits 2 &&
+	# bad input is rejected
+	echo HEAD >bad &&
+	test_expect_code 1 git commit-graph write --stdin-commits <bad 2>err &&
+	test_i18ngrep "invalid commit object id" err &&
+	graph_expect_commits 2 &&
+	# update with valid commit OID, ignore tree OID
+	git rev-parse HEAD HEAD^{tree} >in &&
+	git commit-graph write --stdin-commits --no-check-oids <in &&
+	graph_expect_commits 3
+'
+
 graph_git_two_modes() {
 	git -c core.commitGraph=true $1 >output
 	git -c core.commitGraph=false $1 >expect