diff mbox series

[v2,08/12] bundle: parse filter capability

Message ID 898a7d945131042c48f8e99acccf26378a4c8586.1646689840.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Partial bundles | expand

Commit Message

Derrick Stolee March 7, 2022, 9:50 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

The v3 bundle format has capabilities, allowing newer versions of Git to
create bundles with newer features. Older versions that do not
understand these new capabilities will fail with a helpful warning.

Create a new capability allowing Git to understand that the contained
pack-file is filtered according to some object filter. Typically, this
filter will be "blob:none" for a blobless partial clone.

This change teaches Git to parse this capability, place its value in the
bundle header, and demonstrate this understanding by adding a message to
'git bundle verify'.

Since we will use gently_parse_list_objects_filter() outside of
list-objects-filter-options.c, make it an external method and move its
API documentation to before its declaration.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 Documentation/technical/bundle-format.txt | 11 ++++++++---
 bundle.c                                  | 17 ++++++++++++++++-
 bundle.h                                  |  3 +++
 list-objects-filter-options.c             | 17 +----------------
 list-objects-filter-options.h             | 20 ++++++++++++++++++++
 5 files changed, 48 insertions(+), 20 deletions(-)

Comments

Ævar Arnfjörð Bjarmason March 8, 2022, 9:25 a.m. UTC | #1
On Mon, Mar 07 2022, Derrick Stolee via GitGitGadget wrote:

> From: Derrick Stolee <derrickstolee@github.com>
>
> The v3 bundle format has capabilities, allowing newer versions of Git to
> create bundles with newer features. Older versions that do not
> understand these new capabilities will fail with a helpful warning.
>
> Create a new capability allowing Git to understand that the contained
> pack-file is filtered according to some object filter. Typically, this
> filter will be "blob:none" for a blobless partial clone.
>
> This change teaches Git to parse this capability, place its value in the
> bundle header, and demonstrate this understanding by adding a message to
> 'git bundle verify'.
>
> Since we will use gently_parse_list_objects_filter() outside of
> list-objects-filter-options.c, make it an external method and move its
> API documentation to before its declaration.
> [...]
> --- a/bundle.c
> +++ b/bundle.c
> @@ -11,7 +11,7 @@
>  #include "run-command.h"
>  #include "refs.h"
>  #include "strvec.h"
> -
> +#include "list-objects-filter-options.h"
>  
>  static const char v2_bundle_signature[] = "# v2 git bundle\n";
>  static const char v3_bundle_signature[] = "# v3 git bundle\n";
> @@ -33,6 +33,8 @@ void bundle_header_release(struct bundle_header *header)
>  {
>  	string_list_clear(&header->prerequisites, 1);
>  	string_list_clear(&header->references, 1);
> +	list_objects_filter_release(header->filter);
> +	free(header->filter);
>  }
>  
>  static int parse_capability(struct bundle_header *header, const char *capability)
> @@ -45,6 +47,11 @@ static int parse_capability(struct bundle_header *header, const char *capability
>  		header->hash_algo = &hash_algos[algo];
>  		return 0;
>  	}
> +	if (skip_prefix(capability, "filter=", &arg)) {
> +		CALLOC_ARRAY(header->filter, 1);
> +		parse_list_objects_filter(header->filter, arg);
> +		return 0;
> +	}
>  	return error(_("unknown capability '%s'"), capability);
>  }
>  

Re the comment I had on the v1 about embedding this data in the struct
instead:
https://lore.kernel.org/git/220307.86y21lycne.gmgdl@evledraar.gmail.com/

The below diff passes all your tests, i.e. re using NULL as a marker I
think you may have missed that the API already has a LOFC_DISABLED for
this (and grepping reveals similar API use of it).

I'm not 100% sure it's correct, but if it isn't that's also going to
suggest missing test coverage in this series.

In any case you want the BUNDLE_HEADER_INIT change, your version is
buggy in making that header use NODUP strings by hardcoding { 0 }.

diff --git a/builtin/clone.c b/builtin/clone.c
index 52e50f17baf..000379eea7f 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1172,9 +1172,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 	transport->cloning = 1;
 
 	if (is_bundle) {
-		struct bundle_header header = { 0 };
+		struct bundle_header header = BUNDLE_HEADER_INIT;
 		int fd = read_bundle_header(path, &header);
-		int has_filter = !!header.filter;
+		int has_filter = header.filter.choice != LOFC_DISABLED;
 
 		if (fd > 0)
 			close(fd);
diff --git a/bundle.c b/bundle.c
index 9ca6a7eb1c2..3846108f7a6 100644
--- a/bundle.c
+++ b/bundle.c
@@ -33,8 +33,7 @@ void bundle_header_release(struct bundle_header *header)
 {
 	string_list_clear(&header->prerequisites, 1);
 	string_list_clear(&header->references, 1);
-	list_objects_filter_release(header->filter);
-	free(header->filter);
+	list_objects_filter_release(&header->filter);
 }
 
 static int parse_capability(struct bundle_header *header, const char *capability)
@@ -48,8 +47,7 @@ static int parse_capability(struct bundle_header *header, const char *capability
 		return 0;
 	}
 	if (skip_prefix(capability, "filter=", &arg)) {
-		CALLOC_ARRAY(header->filter, 1);
-		parse_list_objects_filter(header->filter, arg);
+		parse_list_objects_filter(&header->filter, arg);
 		return 0;
 	}
 	return error(_("unknown capability '%s'"), capability);
@@ -227,7 +225,7 @@ int verify_bundle(struct repository *r,
 	req_nr = revs.pending.nr;
 	setup_revisions(2, argv, &revs, NULL);
 
-	revs.filter = header->filter;
+	revs.filter = &header->filter;
 
 	if (prepare_revision_walk(&revs))
 		die(_("revision walk setup failed"));
@@ -269,9 +267,9 @@ int verify_bundle(struct repository *r,
 			  r->nr);
 		list_refs(r, 0, NULL);
 
-		if (header->filter) {
+		if (header->filter.choice != LOFC_DISABLED) {
 			printf_ln("The bundle uses this filter: %s",
-				  list_objects_filter_spec(header->filter));
+				  list_objects_filter_spec(&header->filter));
 		}
 
 		r = &header->prerequisites;
@@ -631,7 +629,7 @@ int unbundle(struct repository *r, struct bundle_header *header,
 	strvec_pushl(&ip.args, "index-pack", "--fix-thin", "--stdin", NULL);
 
 	/* If there is a filter, then we need to create the promisor pack. */
-	if (header->filter)
+	if (header->filter.choice != LOFC_DISABLED)
 		strvec_push(&ip.args, "--promisor=from-bundle");
 
 	if (extra_index_pack_args) {
diff --git a/bundle.h b/bundle.h
index eb026153d56..7fef2108f43 100644
--- a/bundle.h
+++ b/bundle.h
@@ -4,15 +4,14 @@
 #include "strvec.h"
 #include "cache.h"
 #include "string-list.h"
-
-struct list_objects_filter_options;
+#include "list-objects-filter-options.h"
 
 struct bundle_header {
 	unsigned version;
 	struct string_list prerequisites;
 	struct string_list references;
 	const struct git_hash_algo *hash_algo;
-	struct list_objects_filter_options *filter;
+	struct list_objects_filter_options filter;
 };
 
 #define BUNDLE_HEADER_INIT \
Derrick Stolee March 8, 2022, 1:43 p.m. UTC | #2
On 3/8/2022 4:25 AM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Mon, Mar 07 2022, Derrick Stolee via GitGitGadget wrote:
> 
>> From: Derrick Stolee <derrickstolee@github.com>
>>
>> The v3 bundle format has capabilities, allowing newer versions of Git to
>> create bundles with newer features. Older versions that do not
>> understand these new capabilities will fail with a helpful warning.
>>
>> Create a new capability allowing Git to understand that the contained
>> pack-file is filtered according to some object filter. Typically, this
>> filter will be "blob:none" for a blobless partial clone.
>>
>> This change teaches Git to parse this capability, place its value in the
>> bundle header, and demonstrate this understanding by adding a message to
>> 'git bundle verify'.
>>
>> Since we will use gently_parse_list_objects_filter() outside of
>> list-objects-filter-options.c, make it an external method and move its
>> API documentation to before its declaration.
>> [...]
>> --- a/bundle.c
>> +++ b/bundle.c
>> @@ -11,7 +11,7 @@
>>  #include "run-command.h"
>>  #include "refs.h"
>>  #include "strvec.h"
>> -
>> +#include "list-objects-filter-options.h"
>>  
>>  static const char v2_bundle_signature[] = "# v2 git bundle\n";
>>  static const char v3_bundle_signature[] = "# v3 git bundle\n";
>> @@ -33,6 +33,8 @@ void bundle_header_release(struct bundle_header *header)
>>  {
>>  	string_list_clear(&header->prerequisites, 1);
>>  	string_list_clear(&header->references, 1);
>> +	list_objects_filter_release(header->filter);
>> +	free(header->filter);
>>  }
>>  
>>  static int parse_capability(struct bundle_header *header, const char *capability)
>> @@ -45,6 +47,11 @@ static int parse_capability(struct bundle_header *header, const char *capability
>>  		header->hash_algo = &hash_algos[algo];
>>  		return 0;
>>  	}
>> +	if (skip_prefix(capability, "filter=", &arg)) {
>> +		CALLOC_ARRAY(header->filter, 1);
>> +		parse_list_objects_filter(header->filter, arg);
>> +		return 0;
>> +	}
>>  	return error(_("unknown capability '%s'"), capability);
>>  }
>>  
> 
> Re the comment I had on the v1 about embedding this data in the struct
> instead:
> https://lore.kernel.org/git/220307.86y21lycne.gmgdl@evledraar.gmail.com/
> 
> The below diff passes all your tests, i.e. re using NULL as a marker I
> think you may have missed that the API already has a LOFC_DISABLED for
> this (and grepping reveals similar API use of it).

I did miss this LOFC_DISABLED use, which must be the correct way to
interpret an "empty" filter set (re: earlier concerns that a .nr == 0
was used as a BUG() statement in some places).

> I'm not 100% sure it's correct, but if it isn't that's also going to
> suggest missing test coverage in this series.
> 
> In any case you want the BUNDLE_HEADER_INIT change, your version is
> buggy in making that header use NODUP strings by hardcoding { 0 }.

Thanks for pointing this out.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/Documentation/technical/bundle-format.txt b/Documentation/technical/bundle-format.txt
index bac558d049a..b9be8644cf5 100644
--- a/Documentation/technical/bundle-format.txt
+++ b/Documentation/technical/bundle-format.txt
@@ -71,6 +71,11 @@  and the Git bundle v2 format cannot represent a shallow clone repository.
 == Capabilities
 
 Because there is no opportunity for negotiation, unknown capabilities cause 'git
-bundle' to abort.  The only known capability is `object-format`, which specifies
-the hash algorithm in use, and can take the same values as the
-`extensions.objectFormat` configuration value.
+bundle' to abort.
+
+* `object-format` specifies the hash algorithm in use, and can take the same
+  values as the `extensions.objectFormat` configuration value.
+
+* `filter` specifies an object filter as in the `--filter` option in
+  linkgit:git-rev-list[1]. The resulting pack-file must be marked as a
+  `.promisor` pack-file after it is unbundled.
diff --git a/bundle.c b/bundle.c
index dc56db9a50a..2afced4d991 100644
--- a/bundle.c
+++ b/bundle.c
@@ -11,7 +11,7 @@ 
 #include "run-command.h"
 #include "refs.h"
 #include "strvec.h"
-
+#include "list-objects-filter-options.h"
 
 static const char v2_bundle_signature[] = "# v2 git bundle\n";
 static const char v3_bundle_signature[] = "# v3 git bundle\n";
@@ -33,6 +33,8 @@  void bundle_header_release(struct bundle_header *header)
 {
 	string_list_clear(&header->prerequisites, 1);
 	string_list_clear(&header->references, 1);
+	list_objects_filter_release(header->filter);
+	free(header->filter);
 }
 
 static int parse_capability(struct bundle_header *header, const char *capability)
@@ -45,6 +47,11 @@  static int parse_capability(struct bundle_header *header, const char *capability
 		header->hash_algo = &hash_algos[algo];
 		return 0;
 	}
+	if (skip_prefix(capability, "filter=", &arg)) {
+		CALLOC_ARRAY(header->filter, 1);
+		parse_list_objects_filter(header->filter, arg);
+		return 0;
+	}
 	return error(_("unknown capability '%s'"), capability);
 }
 
@@ -220,6 +227,8 @@  int verify_bundle(struct repository *r,
 	req_nr = revs.pending.nr;
 	setup_revisions(2, argv, &revs, NULL);
 
+	revs.filter = header->filter;
+
 	if (prepare_revision_walk(&revs))
 		die(_("revision walk setup failed"));
 
@@ -259,6 +268,12 @@  int verify_bundle(struct repository *r,
 			     r->nr),
 			  r->nr);
 		list_refs(r, 0, NULL);
+
+		if (header->filter) {
+			printf_ln("The bundle uses this filter: %s",
+				  list_objects_filter_spec(header->filter));
+		}
+
 		r = &header->prerequisites;
 		if (!r->nr) {
 			printf_ln(_("The bundle records a complete history."));
diff --git a/bundle.h b/bundle.h
index 06009fe6b1f..eb026153d56 100644
--- a/bundle.h
+++ b/bundle.h
@@ -5,11 +5,14 @@ 
 #include "cache.h"
 #include "string-list.h"
 
+struct list_objects_filter_options;
+
 struct bundle_header {
 	unsigned version;
 	struct string_list prerequisites;
 	struct string_list references;
 	const struct git_hash_algo *hash_algo;
+	struct list_objects_filter_options *filter;
 };
 
 #define BUNDLE_HEADER_INIT \
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index fd8d59f653a..d8597cdee36 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -40,22 +40,7 @@  const char *list_object_filter_config_name(enum list_objects_filter_choice c)
 	BUG("list_object_filter_config_name: invalid argument '%d'", c);
 }
 
-/*
- * Parse value of the argument to the "filter" keyword.
- * On the command line this looks like:
- *       --filter=<arg>
- * and in the pack protocol as:
- *       "filter" SP <arg>
- *
- * The filter keyword will be used by many commands.
- * See Documentation/rev-list-options.txt for allowed values for <arg>.
- *
- * Capture the given arg as the "filter_spec".  This can be forwarded to
- * subordinate commands when necessary (although it's better to pass it through
- * expand_list_objects_filter_spec() first).  We also "intern" the arg for the
- * convenience of the current command.
- */
-static int gently_parse_list_objects_filter(
+int gently_parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg,
 	struct strbuf *errbuf)
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index da5b6737e27..f6fe6a3d2ca 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -72,6 +72,26 @@  struct list_objects_filter_options {
 /* Normalized command line arguments */
 #define CL_ARG__FILTER "filter"
 
+/*
+ * Parse value of the argument to the "filter" keyword.
+ * On the command line this looks like:
+ *       --filter=<arg>
+ * and in the pack protocol as:
+ *       "filter" SP <arg>
+ *
+ * The filter keyword will be used by many commands.
+ * See Documentation/rev-list-options.txt for allowed values for <arg>.
+ *
+ * Capture the given arg as the "filter_spec".  This can be forwarded to
+ * subordinate commands when necessary (although it's better to pass it through
+ * expand_list_objects_filter_spec() first).  We also "intern" the arg for the
+ * convenience of the current command.
+ */
+int gently_parse_list_objects_filter(
+	struct list_objects_filter_options *filter_options,
+	const char *arg,
+	struct strbuf *errbuf);
+
 void list_objects_filter_die_if_populated(
 	struct list_objects_filter_options *filter_options);