diff mbox series

[v2,07/12] bundle: safely handle --objects option

Message ID 19694d5b255227f2314456118c2c7fc986ae52a0.1646689840.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Partial bundles | expand

Commit Message

Derrick Stolee March 7, 2022, 9:50 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

Since 'git bundle' uses setup_revisions() to specify the object walk,
some options do not make sense to include during the pack-objects child
process. Further, these options are used for a call to
traverse_commit_list() which would then require a callback which is
currently NULL.

By populating the callback we prevent a segfault in the case of adding
the --objects flag. This is really a redundant statement because the
command is constructing a pack-file containing all objects in the
discovered commit range.

Adding --objects to a 'git bundle' command might cause a slower command,
but at least it will not have a hard failure when the user supplies this
option. We can also disable walking trees and blobs in advance of this
walk.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 bundle.c               | 10 +++++++++-
 t/t6020-bundle-misc.sh | 12 ++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

Comments

Ævar Arnfjörð Bjarmason March 8, 2022, 9:37 a.m. UTC | #1
On Mon, Mar 07 2022, Derrick Stolee via GitGitGadget wrote:

> From: Derrick Stolee <derrickstolee@github.com>
>
> Since 'git bundle' uses setup_revisions() to specify the object walk,
> some options do not make sense to include during the pack-objects child
> process. Further, these options are used for a call to
> traverse_commit_list() which would then require a callback which is
> currently NULL.
>
> By populating the callback we prevent a segfault in the case of adding
> the --objects flag. This is really a redundant statement because the
> command is constructing a pack-file containing all objects in the
> discovered commit range.
>
> Adding --objects to a 'git bundle' command might cause a slower command,
> but at least it will not have a hard failure when the user supplies this
> option. We can also disable walking trees and blobs in advance of this
> walk.
>
> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
> ---
>  bundle.c               | 10 +++++++++-
>  t/t6020-bundle-misc.sh | 12 ++++++++++++
>  2 files changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/bundle.c b/bundle.c
> index a0bb687b0f4..dc56db9a50a 100644
> --- a/bundle.c
> +++ b/bundle.c
> @@ -451,6 +451,12 @@ struct bundle_prerequisites_info {
>  	int fd;
>  };
>  
> +
> +static void ignore_object(struct object *obj, const char *v, void *data)
> +{
> +	/* Do nothing. */
> +}
> +
>  static void write_bundle_prerequisites(struct commit *commit, void *data)
>  {
>  	struct bundle_prerequisites_info *bpi = data;
> @@ -544,7 +550,9 @@ int create_bundle(struct repository *r, const char *path,
>  		die("revision walk setup failed");
>  	bpi.fd = bundle_fd;
>  	bpi.pending = &revs_copy.pending;
> -	traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi);
> +
> +	revs.blob_objects = revs.tree_objects = 0;
> +	traverse_commit_list(&revs, write_bundle_prerequisites, ignore_object, &bpi);
>  	object_array_remove_duplicates(&revs_copy.pending);
>  
>  	/* write bundle refs */
> diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh
> index b13e8a52a93..6522401617d 100755
> --- a/t/t6020-bundle-misc.sh
> +++ b/t/t6020-bundle-misc.sh
> @@ -475,4 +475,16 @@ test_expect_success 'clone from bundle' '
>  	test_cmp expect actual
>  '
>  
> +test_expect_success 'unfiltered bundle with --objects' '
> +	git bundle create all-objects.bdl \
> +		--all --objects &&
> +	git bundle create all.bdl \
> +		--all &&
> +
> +	# Compare the headers of these files.
> +	head -11 all.bdl >expect &&
> +	head -11 all-objects.bdl >actual &&
> +	test_cmp expect actual
> +'
> +
>  test_done

Re this comment on v1: https://lore.kernel.org/git/220307.86fsntzsda.gmgdl@evledraar.gmail.com/

This series also passes your tests with this on top:
	
	diff --git a/bundle.c b/bundle.c
	index 3846108f7a6..1f022f53336 100644
	--- a/bundle.c
	+++ b/bundle.c
	@@ -468,11 +468,6 @@ struct bundle_prerequisites_info {
	 };
	 
	 
	-static void ignore_object(struct object *obj, const char *v, void *data)
	-{
	-	/* Do nothing. */
	-}
	-
	 static void write_bundle_prerequisites(struct commit *commit, void *data)
	 {
	 	struct bundle_prerequisites_info *bpi = data;
	@@ -598,7 +593,7 @@ int create_bundle(struct repository *r, const char *path,
	 	 */
	 	revs.filter = NULL;
	 	revs.blob_objects = revs.tree_objects = 0;
	-	traverse_commit_list(&revs, write_bundle_prerequisites, ignore_object, &bpi);
	+	traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi);
	 	object_array_remove_duplicates(&revs_copy.pending);
	 
	 	/* write bundle refs */
	diff --git a/list-objects.c b/list-objects.c
	index 9422625b39e..d44a1db2262 100644
	--- a/list-objects.c
	+++ b/list-objects.c
	@@ -227,7 +227,7 @@ static void process_tag(struct traversal_context *ctx,
	 					       ctx->filter);
	 	if (r & LOFR_MARK_SEEN)
	 		tag->object.flags |= SEEN;
	-	if (r & LOFR_DO_SHOW)
	+	if (r & LOFR_DO_SHOW && ctx->show_object)
	 		ctx->show_object(&tag->object, name, ctx->show_data);
	 }
	 

Aside from whether that's a good idea, doesn't that at least point to
missing test coverage here, see traverse_non_commits() and other paths
in list-objects.c that'll call ctx->show_object().

I think an actually sensible patch for this is the below, i.e. the API
is conflating "do show" with "should we show AND we have a callback?":
	
	diff --git a/bundle.c b/bundle.c
	index 3846108f7a6..1f022f53336 100644
	--- a/bundle.c
	+++ b/bundle.c
	@@ -468,11 +468,6 @@ struct bundle_prerequisites_info {
	 };
	 
	 
	-static void ignore_object(struct object *obj, const char *v, void *data)
	-{
	-	/* Do nothing. */
	-}
	-
	 static void write_bundle_prerequisites(struct commit *commit, void *data)
	 {
	 	struct bundle_prerequisites_info *bpi = data;
	@@ -598,7 +593,7 @@ int create_bundle(struct repository *r, const char *path,
	 	 */
	 	revs.filter = NULL;
	 	revs.blob_objects = revs.tree_objects = 0;
	-	traverse_commit_list(&revs, write_bundle_prerequisites, ignore_object, &bpi);
	+	traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi);
	 	object_array_remove_duplicates(&revs_copy.pending);
	 
	 	/* write bundle refs */
	diff --git a/list-objects.c b/list-objects.c
	index 9422625b39e..1725cb252a9 100644
	--- a/list-objects.c
	+++ b/list-objects.c
	@@ -21,6 +21,22 @@ struct traversal_context {
	 	struct filter *filter;
	 };
	 
	+static void show_commit(struct traversal_context *ctx, struct commit *commit,
	+			void *data)
	+{
	+	if (!ctx->show_commit)
	+		return;
	+	ctx->show_commit(commit, data);
	+}
	+
	+static void show_object(struct traversal_context *ctx, struct object *object,
	+			const char *path, void *data)
	+{
	+	if (!ctx->show_object)
	+		return;
	+	ctx->show_object(object, path, data);
	+}
	+
	 static void process_blob(struct traversal_context *ctx,
	 			 struct blob *blob,
	 			 struct strbuf *path,
	@@ -60,7 +76,7 @@ static void process_blob(struct traversal_context *ctx,
	 	if (r & LOFR_MARK_SEEN)
	 		obj->flags |= SEEN;
	 	if (r & LOFR_DO_SHOW)
	-		ctx->show_object(obj, path->buf, ctx->show_data);
	+		show_object(ctx, obj, path->buf, ctx->show_data);
	 	strbuf_setlen(path, pathlen);
	 }
	 
	@@ -194,7 +210,7 @@ static void process_tree(struct traversal_context *ctx,
	 	if (r & LOFR_MARK_SEEN)
	 		obj->flags |= SEEN;
	 	if (r & LOFR_DO_SHOW)
	-		ctx->show_object(obj, base->buf, ctx->show_data);
	+		show_object(ctx, obj, base->buf, ctx->show_data);
	 	if (base->len)
	 		strbuf_addch(base, '/');
	 
	@@ -210,7 +226,7 @@ static void process_tree(struct traversal_context *ctx,
	 	if (r & LOFR_MARK_SEEN)
	 		obj->flags |= SEEN;
	 	if (r & LOFR_DO_SHOW)
	-		ctx->show_object(obj, base->buf, ctx->show_data);
	+		show_object(ctx, obj, base->buf, ctx->show_data);
	 
	 	strbuf_setlen(base, baselen);
	 	free_tree_buffer(tree);
	@@ -228,7 +244,7 @@ static void process_tag(struct traversal_context *ctx,
	 	if (r & LOFR_MARK_SEEN)
	 		tag->object.flags |= SEEN;
	 	if (r & LOFR_DO_SHOW)
	-		ctx->show_object(&tag->object, name, ctx->show_data);
	+		show_object(ctx, &tag->object, name, ctx->show_data);
	 }
	 
	 static void mark_edge_parents_uninteresting(struct commit *commit,
	@@ -402,7 +418,7 @@ static void do_traverse(struct traversal_context *ctx)
	 		if (r & LOFR_MARK_SEEN)
	 			commit->object.flags |= SEEN;
	 		if (r & LOFR_DO_SHOW)
	-			ctx->show_commit(commit, ctx->show_data);
	+			show_commit(ctx, commit, ctx->show_data);
	 
	 		if (ctx->revs->tree_blobs_in_commit_order)
	 			/*

I think that'll do what you want, and also seems to set us up for safer
API use going forward, i.e. we have a couple of NULL-passing callers
already.
Derrick Stolee March 8, 2022, 1:45 p.m. UTC | #2
On 3/8/2022 4:37 AM, Ævar Arnfjörð Bjarmason wrote:
> 
> Re this comment on v1: https://lore.kernel.org/git/220307.86fsntzsda.gmgdl@evledraar.gmail.com/
> Aside from whether that's a good idea, doesn't that at least point to
> missing test coverage here, see traverse_non_commits() and other paths
> in list-objects.c that'll call ctx->show_object().
> 
> I think an actually sensible patch for this is the below, i.e. the API
> is conflating "do show" with "should we show AND we have a callback?":
...
> I think that'll do what you want, and also seems to set us up for safer
> API use going forward, i.e. we have a couple of NULL-passing callers
> already.

Squashing this change into the commit makes most sense to attribute
authorship to you. May I forge your sign-off in that patch for v3?

Thanks,
-Stolee
Ævar Arnfjörð Bjarmason March 8, 2022, 1:53 p.m. UTC | #3
On Tue, Mar 08 2022, Derrick Stolee wrote:

> On 3/8/2022 4:37 AM, Ævar Arnfjörð Bjarmason wrote:
>> 
>> Re this comment on v1: https://lore.kernel.org/git/220307.86fsntzsda.gmgdl@evledraar.gmail.com/
>> Aside from whether that's a good idea, doesn't that at least point to
>> missing test coverage here, see traverse_non_commits() and other paths
>> in list-objects.c that'll call ctx->show_object().
>> 
>> I think an actually sensible patch for this is the below, i.e. the API
>> is conflating "do show" with "should we show AND we have a callback?":
> ...
>> I think that'll do what you want, and also seems to set us up for safer
>> API use going forward, i.e. we have a couple of NULL-passing callers
>> already.
>
> Squashing this change into the commit makes most sense to attribute
> authorship to you. May I forge your sign-off in that patch for v3?

Sounds good! :)

(Also, for anything inline or throw-away like that that I post on list
it's safe to assume my Signed-off-by)
diff mbox series

Patch

diff --git a/bundle.c b/bundle.c
index a0bb687b0f4..dc56db9a50a 100644
--- a/bundle.c
+++ b/bundle.c
@@ -451,6 +451,12 @@  struct bundle_prerequisites_info {
 	int fd;
 };
 
+
+static void ignore_object(struct object *obj, const char *v, void *data)
+{
+	/* Do nothing. */
+}
+
 static void write_bundle_prerequisites(struct commit *commit, void *data)
 {
 	struct bundle_prerequisites_info *bpi = data;
@@ -544,7 +550,9 @@  int create_bundle(struct repository *r, const char *path,
 		die("revision walk setup failed");
 	bpi.fd = bundle_fd;
 	bpi.pending = &revs_copy.pending;
-	traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi);
+
+	revs.blob_objects = revs.tree_objects = 0;
+	traverse_commit_list(&revs, write_bundle_prerequisites, ignore_object, &bpi);
 	object_array_remove_duplicates(&revs_copy.pending);
 
 	/* write bundle refs */
diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh
index b13e8a52a93..6522401617d 100755
--- a/t/t6020-bundle-misc.sh
+++ b/t/t6020-bundle-misc.sh
@@ -475,4 +475,16 @@  test_expect_success 'clone from bundle' '
 	test_cmp expect actual
 '
 
+test_expect_success 'unfiltered bundle with --objects' '
+	git bundle create all-objects.bdl \
+		--all --objects &&
+	git bundle create all.bdl \
+		--all &&
+
+	# Compare the headers of these files.
+	head -11 all.bdl >expect &&
+	head -11 all-objects.bdl >actual &&
+	test_cmp expect actual
+'
+
 test_done