diff mbox series

[v2,31/39] bundle: add new version for use with SHA-256

Message ID 20200713024909.3714837-32-sandals@crustytoothpaste.net (mailing list archive)
State New, archived
Headers show
Series SHA-256, part 3/3 | expand

Commit Message

brian m. carlson July 13, 2020, 2:49 a.m. UTC
Currently we detect the hash algorithm in use by the length of the
object ID.  This is inelegant and prevents us from using a different
hash algorithm that is also 256 bits in length.

Since we cannot extend the v2 format in a backward-compatible way, let's
add a v3 format, which is identical, except for the addition of
capabilities, which are prefixed by an at sign.  We add "object-format"
as the only capability and reject unknown capabilities, since we do not
have a network connection and therefore cannot negotiate with the other
side.

For compatibility, default to the v2 format for SHA-1 and require v3
for SHA-256.

In t5510, always use format v3 so we can be sure we produce consistent
results across hash algorithms.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 Documentation/git-bundle.txt              |  9 ++-
 Documentation/technical/bundle-format.txt | 30 +++++++++-
 builtin/bundle.c                          |  5 +-
 bundle.c                                  | 69 +++++++++++++++--------
 bundle.h                                  |  4 +-
 t/t5510-fetch.sh                          |  9 +--
 t/t5607-clone-bundle.sh                   | 27 +++++++++
 7 files changed, 122 insertions(+), 31 deletions(-)

Comments

Junio C Hamano July 13, 2020, 4:26 a.m. UTC | #1
"brian m. carlson" <sandals@crustytoothpaste.net> writes:

> Currently we detect the hash algorithm in use by the length of the
> object ID.  This is inelegant and prevents us from using a different
> hash algorithm that is also 256 bits in length.
>
> Since we cannot extend the v2 format in a backward-compatible way, let's
> add a v3 format, which is identical, except for the addition of
> capabilities, which are prefixed by an at sign.  We add "object-format"
> as the only capability and reject unknown capabilities, since we do not
> have a network connection and therefore cannot negotiate with the other
> side.
>
> For compatibility, default to the v2 format for SHA-1 and require v3
> for SHA-256.
>
> In t5510, always use format v3 so we can be sure we produce consistent
> results across hash algorithms.
>
> Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
> ---

Wow, that was quick ;-)

> +capability   = "@" key ["=" value] LF
> +key          = 1*(ALPHA / DIGIT / "-")
> +value        = *(%01-09 / %0b-FF)

OK.

> --- a/builtin/bundle.c
> +++ b/builtin/bundle.c
> @@ -60,6 +60,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
>  	int all_progress_implied = 0;
>  	int progress = isatty(STDERR_FILENO);
>  	struct argv_array pack_opts;
> +	int version = -1;
>  
>  	struct option options[] = {
>  		OPT_SET_INT('q', "quiet", &progress,
> @@ -71,6 +72,8 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
>  		OPT_BOOL(0, "all-progress-implied",
>  			 &all_progress_implied,
>  			 N_("similar to --all-progress when progress meter is shown")),
> +		OPT_INTEGER(0, "version", &version,
> +			    N_("specify bundle format version")),
>  		OPT_END()
>  	};
>  	const char* bundle_file;
> @@ -91,7 +94,7 @@ static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
>  
>  	if (!startup_info->have_repository)
>  		die(_("Need a repository to create a bundle."));
> -	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts);
> +	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);

OK, so the builtin command itself does not know how the default
version is picked; it is left to the code in bundle.c which is at a
lower layer.  Makes sense.

> diff --git a/bundle.c b/bundle.c
> index 2a0d744d3f..5636ea5f7c 100644
> --- a/bundle.c
> +++ b/bundle.c
> @@ -12,7 +12,8 @@
>  #include "refs.h"
>  #include "argv-array.h"
>  
> -static const char bundle_signature[] = "# v2 git bundle\n";
> +static const char v2_bundle_signature[] = "# v2 git bundle\n";
> +static const char v3_bundle_signature[] = "# v3 git bundle\n";
>  
>  static void add_to_ref_list(const struct object_id *oid, const char *name,
>  		struct ref_list *list)
> @@ -23,17 +24,20 @@ static void add_to_ref_list(const struct object_id *oid, const char *name,
>  	list->nr++;
>  }
>  
> -static const struct git_hash_algo *detect_hash_algo(struct strbuf *buf)
> +static int parse_capability(struct bundle_header *header, const char *capability)
>  {
> +	const char *arg;
> +	if (skip_prefix(capability, "object-format=", &arg)) {
> +		int algo = hash_algo_by_name(arg);
> +		if (algo == GIT_HASH_UNKNOWN)
> +			return error(_("unable to detect hash algorithm"));
> +		header->hash_algo = &hash_algos[algo];
> +		return 0;
> +	}
> +	return error(_("unknown capability '%s'"), capability);

This certainly enforces the earlier EBNF we saw on the value side by
the caller stopping at the first LF and feeding the bytes before it
to the function, but there is no sanity checking for the key part.

In a sense, there is no need to. As we always reject what is unknown
to us, the only thing we need is to ensure that skip_prefix() we use
here are the syntactically correct key followed by '='.

>  }
>  
> +
>  static int parse_bundle_header(int fd, struct bundle_header *header,
>  			       const char *report_path)
>  {

I do not particularly mind two blank lines between functions but
let's be consistent, and this patch is not the place to make that
change.

> @@ -42,14 +46,18 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
>  
>  	/* The bundle header begins with the signature */
>  	if (strbuf_getwholeline_fd(&buf, fd, '\n') ||
> -	    strcmp(buf.buf, bundle_signature)) {
> +	    (strcmp(buf.buf, v2_bundle_signature) &&
> +	     strcmp(buf.buf, v3_bundle_signature))) {
>  		if (report_path)
> -			error(_("'%s' does not look like a v2 bundle file"),
> +			error(_("'%s' does not look like a v2 or v3 bundle file"),
>  			      report_path);
>  		status = -1;
>  		goto abort;
>  	}

OK.

> +	header->version = buf.buf[3] - '0';

Hmmmmm.  So in addition to the vN_bundle_signature[], the code also
knows where the '2' and '3' are?  It may be overkill to have a
helper function just for this but

	if (strbuf_getwholeline_fd(&buf, fd, '\n') ||
	    parse_bundle_signature(buf.buf, header)) {
		... does not look like a supported bundle file ...
	}

with

	static struct {
		int version;
	        const char *signature;
	} bundle_sigs[] = {
		{ 2, "# v2 git bundle\n" },
		{ 3, "# v3 git bundle\n" },
	};

	static int parse_bundle_signature(const char *s, struct bundle_header *h)
	{
		int i;

                for (i = 0; i < ARRAY_SIZE(bundle_sigs); i++) {
			if (!strcmp(s, bundle_sigs[i].signature) {
				h->version = bundle_sigs[i].version;
				return 0;
			}
		}
		return -1;
	}

or something like that?

> +	header->hash_algo = the_hash_algo;
> +
>  	/* The bundle header ends with an empty line */
>  	while (!strbuf_getwholeline_fd(&buf, fd, '\n') &&
>  	       buf.len && buf.buf[0] != '\n') {
> @@ -57,21 +65,21 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
>  		int is_prereq = 0;
>  		const char *p;
>  
> +		if (header->version == 3 && *buf.buf == '@') {
> +			buf.buf[buf.len - 1] = '\0';

This is chomping the final LF.  I know the current code does not
care, but after this, buf.len is inconsistent with buf.buf in that
the NUL we placed here is not part of the payload on the line.
Doing strbuf_rtrim(&buf) is certainly too much (it also drops
trailing whitespaces), but it somewhat feels disturbing.  It might
be sufficient to say

			buf.buf[--buf.len] = '\0';

to make it palatable.  I dunno.

> +			if (parse_capability(header, buf.buf + 1)) {
> +				status = -1;
> +				break;
> +			}
> +			continue;
> +		}
> +
>  		if (*buf.buf == '-') {
>  			is_prereq = 1;
>  			strbuf_remove(&buf, 0, 1);
>  		}
>  		strbuf_rtrim(&buf);
>  
> -		if (!header->hash_algo) {
> -			header->hash_algo = detect_hash_algo(&buf);
> -			if (!header->hash_algo) {
> -				error(_("unknown hash algorithm length"));
> -				status = -1;
> -				break;
> -			}
> -		}
> -
>  		/*
>  		 * Tip lines have object name, SP, and refname.
>  		 * Prerequisites have object name that is optionally
> @@ -449,13 +457,14 @@ static int write_bundle_refs(int bundle_fd, struct rev_info *revs)
>  }
>  
>  int create_bundle(struct repository *r, const char *path,
> -		  int argc, const char **argv, struct argv_array *pack_options)
> +		  int argc, const char **argv, struct argv_array *pack_options, int version)
>  {
>  	struct lock_file lock = LOCK_INIT;
>  	int bundle_fd = -1;
>  	int bundle_to_stdout;
>  	int ref_count = 0;
>  	struct rev_info revs;
> +	int default_version = the_hash_algo == &hash_algos[GIT_HASH_SHA1] ? 2 : 3;
>  
>  	bundle_to_stdout = !strcmp(path, "-");
>  	if (bundle_to_stdout)
> @@ -464,8 +473,22 @@ int create_bundle(struct repository *r, const char *path,
>  		bundle_fd = hold_lock_file_for_update(&lock, path,
>  						      LOCK_DIE_ON_ERROR);
>  
> -	/* write signature */
> -	write_or_die(bundle_fd, bundle_signature, strlen(bundle_signature));
> +	if (version == -1)
> +		version = default_version;
> +
> +	if (version < 2 || version > 3) {
> +		die(_("unsupported bundle version %d"), version);
> +	} else if (version < default_version) {
> +		die(_("cannot write bundle version %d with algorithm %s"), version, the_hash_algo->name);
> +	} else if (version == 2) {
> +		write_or_die(bundle_fd, v2_bundle_signature, strlen(v2_bundle_signature));
> +	} else {
> +		const char *capability = "@object-format=";
> +		write_or_die(bundle_fd, v3_bundle_signature, strlen(v3_bundle_signature));
> +		write_or_die(bundle_fd, capability, strlen(capability));
> +		write_or_die(bundle_fd, the_hash_algo->name, strlen(the_hash_algo->name));
> +		write_or_die(bundle_fd, "\n", 1);
> +	}

Quite straight-forward.

> diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
> index a66dbe0bde..9284b64e7e 100755
> --- a/t/t5510-fetch.sh
> +++ b/t/t5510-fetch.sh
> @@ -281,15 +281,16 @@ test_expect_success 'create bundle 1' '
>  	cd "$D" &&
>  	echo >file updated again by origin &&
>  	git commit -a -m "tip" &&
> -	git bundle create bundle1 master^..master
> +	git bundle create --version=3 bundle1 master^..master
>  '
>  
>  test_expect_success 'header of bundle looks right' '
>  	head -n 4 "$D"/bundle1 &&
>  	head -n 1 "$D"/bundle1 | grep "^#" &&
> -	head -n 2 "$D"/bundle1 | grep "^-$OID_REGEX " &&
> -	head -n 3 "$D"/bundle1 | grep "^$OID_REGEX " &&
> -	head -n 4 "$D"/bundle1 | grep "^$"
> +	head -n 2 "$D"/bundle1 | grep "^@object-format=" &&
> +	head -n 3 "$D"/bundle1 | grep "^-$OID_REGEX " &&
> +	head -n 4 "$D"/bundle1 | grep "^$OID_REGEX " &&
> +	head -n 5 "$D"/bundle1 | grep "^$"
>  '

This is not new, but because "head -n $n" is not "look at the $n-th
line", but "look at the first $n lines", the construct look somewhat
bogus.  Wouldn't we want something more like

	sed -n -e 1p "$D/bundle1" | grep "^#" &&
	sed -n -e 2p "$D/bundle1" | grep "^@object-format=" &&
	...

I wonder.

> +test_expect_success 'git bundle uses expected default format' '
> +	git bundle create bundle HEAD^.. &&
> +	head -n1 bundle | grep "^# v$(test_oid version) git bundle$"
> +'

Nice.

> +test_expect_success 'git bundle v3 has expected contents' '
> +	git branch side HEAD &&
> +	git bundle create --version=3 bundle HEAD^..side &&
> +	head -n2 bundle >actual &&
> +	echo "# v3 git bundle" >expected &&
> +	echo "@object-format=$(test_oid algo)" >>expected &&

Rather than two separate echo,

	cat >expect <<-EOF &&
	# v3 git bundle
	@object-format=$(test_oid algo)
	EOF

might be more direct and easier to see what is expected.

> +	test_cmp actual expected &&
> +	git bundle verify bundle
> +'
> +
> +test_expect_success 'git bundle v3 rejects unknown extensions' '
> +	head -n2 bundle >new &&
> +	echo "@unknown=silly" >>new &&
> +	sed "1,2d" >>new &&

What are we feeding this sed?  You meant to feed 'bundle'?

> +	test_must_fail git bundle verify new 2>output &&
> +	grep "unknown capability .unknown=silly." output
> +'
> +
>  test_done
SZEDER Gábor July 13, 2020, 3:56 p.m. UTC | #2
On Mon, Jul 13, 2020 at 02:49:01AM +0000, brian m. carlson wrote:
> @@ -23,17 +24,20 @@ static void add_to_ref_list(const struct object_id *oid, const char *name,
>  	list->nr++;
>  }
>  
> -static const struct git_hash_algo *detect_hash_algo(struct strbuf *buf)
> +static int parse_capability(struct bundle_header *header, const char *capability)
>  {
> -	size_t len = strcspn(buf->buf, " \n");
> -	int algo;
> -
> -	algo = hash_algo_by_length(len / 2);
> -	if (algo == GIT_HASH_UNKNOWN)
> -		return NULL;
> -	return &hash_algos[algo];
> +	const char *arg;
> +	if (skip_prefix(capability, "object-format=", &arg)) {
> +		int algo = hash_algo_by_name(arg);
> +		if (algo == GIT_HASH_UNKNOWN)
> +			return error(_("unable to detect hash algorithm"));
> +		header->hash_algo = &hash_algos[algo];
> +		return 0;
> +	}
> +	return error(_("unknown capability '%s'"), capability);
>  }


> +test_expect_success 'git bundle v3 rejects unknown extensions' '
> +	head -n2 bundle >new &&
> +	echo "@unknown=silly" >>new &&
> +	sed "1,2d" >>new &&
> +	test_must_fail git bundle verify new 2>output &&
> +	grep "unknown capability .unknown=silly." output

This "unknown capability" error message is translated, so it should be
checked with 'test_i18ngrep'.

> +'
> +
>  test_done
Johannes Schindelin July 15, 2020, 7:42 p.m. UTC | #3
Hi,

On Mon, 13 Jul 2020, SZEDER Gábor wrote:

> On Mon, Jul 13, 2020 at 02:49:01AM +0000, brian m. carlson wrote:
> > @@ -23,17 +24,20 @@ static void add_to_ref_list(const struct object_id *oid, const char *name,
> >  	list->nr++;
> >  }
> >
> > -static const struct git_hash_algo *detect_hash_algo(struct strbuf *buf)
> > +static int parse_capability(struct bundle_header *header, const char *capability)
> >  {
> > -	size_t len = strcspn(buf->buf, " \n");
> > -	int algo;
> > -
> > -	algo = hash_algo_by_length(len / 2);
> > -	if (algo == GIT_HASH_UNKNOWN)
> > -		return NULL;
> > -	return &hash_algos[algo];
> > +	const char *arg;
> > +	if (skip_prefix(capability, "object-format=", &arg)) {
> > +		int algo = hash_algo_by_name(arg);
> > +		if (algo == GIT_HASH_UNKNOWN)
> > +			return error(_("unable to detect hash algorithm"));
> > +		header->hash_algo = &hash_algos[algo];
> > +		return 0;
> > +	}
> > +	return error(_("unknown capability '%s'"), capability);
> >  }
>
>
> > +test_expect_success 'git bundle v3 rejects unknown extensions' '
> > +	head -n2 bundle >new &&
> > +	echo "@unknown=silly" >>new &&
> > +	sed "1,2d" >>new &&
> > +	test_must_fail git bundle verify new 2>output &&
> > +	grep "unknown capability .unknown=silly." output
>
> This "unknown capability" error message is translated, so it should be
> checked with 'test_i18ngrep'.

In other words, this patch (which makes things work over here):

-- snipsnap --
Subject: [PATCH] fixup! bundle: add new version for use with SHA-256

---
 t/t5607-clone-bundle.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t5607-clone-bundle.sh b/t/t5607-clone-bundle.sh
index 4a2a3968cc1..ca4efd88d4a 100755
--- a/t/t5607-clone-bundle.sh
+++ b/t/t5607-clone-bundle.sh
@@ -118,7 +118,7 @@ test_expect_success 'git bundle v3 rejects unknown extensions' '
 	echo "@unknown=silly" >>new &&
 	sed "1,2d" >>new &&
 	test_must_fail git bundle verify new 2>output &&
-	grep "unknown capability .unknown=silly." output
+	test_i18ngrep "unknown capability .unknown=silly." output
 '

 test_done
--
2.26.0.windows.1
Junio C Hamano July 15, 2020, 8:02 p.m. UTC | #4
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> This "unknown capability" error message is translated, so it should be
>> checked with 'test_i18ngrep'.
>
> In other words, this patch (which makes things work over here):

Thanks.

>
> -- snipsnap --
> Subject: [PATCH] fixup! bundle: add new version for use with SHA-256
>
> ---
>  t/t5607-clone-bundle.sh | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/t/t5607-clone-bundle.sh b/t/t5607-clone-bundle.sh
> index 4a2a3968cc1..ca4efd88d4a 100755
> --- a/t/t5607-clone-bundle.sh
> +++ b/t/t5607-clone-bundle.sh
> @@ -118,7 +118,7 @@ test_expect_success 'git bundle v3 rejects unknown extensions' '
>  	echo "@unknown=silly" >>new &&
>  	sed "1,2d" >>new &&
>  	test_must_fail git bundle verify new 2>output &&
> -	grep "unknown capability .unknown=silly." output
> +	test_i18ngrep "unknown capability .unknown=silly." output
>  '
>
>  test_done
> --
> 2.26.0.windows.1
diff mbox series

Patch

diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt
index d34b0964be..53804cad4b 100644
--- a/Documentation/git-bundle.txt
+++ b/Documentation/git-bundle.txt
@@ -9,7 +9,8 @@  git-bundle - Move objects and refs by archive
 SYNOPSIS
 --------
 [verse]
-'git bundle' create [-q | --quiet | --progress | --all-progress] [--all-progress-implied] <file> <git-rev-list-args>
+'git bundle' create [-q | --quiet | --progress | --all-progress] [--all-progress-implied]
+		    [--version=<version>] <file> <git-rev-list-args>
 'git bundle' verify [-q | --quiet] <file>
 'git bundle' list-heads <file> [<refname>...]
 'git bundle' unbundle <file> [<refname>...]
@@ -102,6 +103,12 @@  unbundle <file>::
 	is activated.  Unlike --all-progress this flag doesn't actually
 	force any progress display by itself.
 
+--version=<version>::
+	Specify the bundle version.  Version 2 is the older format and can only be
+	used with SHA-1 repositories; the newer version 3 contains capabilities that
+	permit extensions. The default is the oldest supported format, based on the
+	hash algorithm in use.
+
 -q::
 --quiet::
 	This flag makes the command not to report its progress
diff --git a/Documentation/technical/bundle-format.txt b/Documentation/technical/bundle-format.txt
index 0e828151a5..01b76e6be6 100644
--- a/Documentation/technical/bundle-format.txt
+++ b/Documentation/technical/bundle-format.txt
@@ -7,6 +7,8 @@  The Git bundle format is a format that represents both refs and Git objects.
 We will use ABNF notation to define the Git bundle format. See
 protocol-common.txt for the details.
 
+A v2 bundle looks like this:
+
 ----
 bundle    = signature *prerequisite *reference LF pack
 signature = "# v2 git bundle" LF
@@ -18,9 +20,28 @@  reference    = obj-id SP refname LF
 pack         = ... ; packfile
 ----
 
+A v3 bundle looks like this:
+
+----
+bundle    = signature *capability *prerequisite *reference LF pack
+signature = "# v3 git bundle" LF
+
+capability   = "@" key ["=" value] LF
+prerequisite = "-" obj-id SP comment LF
+comment      = *CHAR
+reference    = obj-id SP refname LF
+key          = 1*(ALPHA / DIGIT / "-")
+value        = *(%01-09 / %0b-FF)
+
+pack         = ... ; packfile
+----
+
 == Semantics
 
-A Git bundle consists of three parts.
+A Git bundle consists of four parts.
+
+* "Capabilities", which are only in the v3 format, indicate functionality that
+	the bundle requires to be read properly.
 
 * "Prerequisites" lists the objects that are NOT included in the bundle and the
   reader of the bundle MUST already have, in order to use the data in the
@@ -46,3 +67,10 @@  put any string here. The reader of the bundle MUST ignore the comment.
 Note that the prerequisites does not represent a shallow-clone boundary. The
 semantics of the prerequisites and the shallow-clone boundaries are different,
 and the Git bundle v2 format cannot represent a shallow clone repository.
+
+== Capabilities
+
+Because there is no opportunity for negotiation, unknown capabilities cause 'git
+bundle' to abort.  The only known capability is `object-format`, which specifies
+the hash algorithm in use, and can take the same values as the
+`extensions.objectFormat` configuration value.
diff --git a/builtin/bundle.c b/builtin/bundle.c
index f049d27a14..e1a85e7dcc 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -60,6 +60,7 @@  static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 	int all_progress_implied = 0;
 	int progress = isatty(STDERR_FILENO);
 	struct argv_array pack_opts;
+	int version = -1;
 
 	struct option options[] = {
 		OPT_SET_INT('q', "quiet", &progress,
@@ -71,6 +72,8 @@  static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 		OPT_BOOL(0, "all-progress-implied",
 			 &all_progress_implied,
 			 N_("similar to --all-progress when progress meter is shown")),
+		OPT_INTEGER(0, "version", &version,
+			    N_("specify bundle format version")),
 		OPT_END()
 	};
 	const char* bundle_file;
@@ -91,7 +94,7 @@  static int cmd_bundle_create(int argc, const char **argv, const char *prefix) {
 
 	if (!startup_info->have_repository)
 		die(_("Need a repository to create a bundle."));
-	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts);
+	return !!create_bundle(the_repository, bundle_file, argc, argv, &pack_opts, version);
 }
 
 static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
diff --git a/bundle.c b/bundle.c
index 2a0d744d3f..5636ea5f7c 100644
--- a/bundle.c
+++ b/bundle.c
@@ -12,7 +12,8 @@ 
 #include "refs.h"
 #include "argv-array.h"
 
-static const char bundle_signature[] = "# v2 git bundle\n";
+static const char v2_bundle_signature[] = "# v2 git bundle\n";
+static const char v3_bundle_signature[] = "# v3 git bundle\n";
 
 static void add_to_ref_list(const struct object_id *oid, const char *name,
 		struct ref_list *list)
@@ -23,17 +24,20 @@  static void add_to_ref_list(const struct object_id *oid, const char *name,
 	list->nr++;
 }
 
-static const struct git_hash_algo *detect_hash_algo(struct strbuf *buf)
+static int parse_capability(struct bundle_header *header, const char *capability)
 {
-	size_t len = strcspn(buf->buf, " \n");
-	int algo;
-
-	algo = hash_algo_by_length(len / 2);
-	if (algo == GIT_HASH_UNKNOWN)
-		return NULL;
-	return &hash_algos[algo];
+	const char *arg;
+	if (skip_prefix(capability, "object-format=", &arg)) {
+		int algo = hash_algo_by_name(arg);
+		if (algo == GIT_HASH_UNKNOWN)
+			return error(_("unable to detect hash algorithm"));
+		header->hash_algo = &hash_algos[algo];
+		return 0;
+	}
+	return error(_("unknown capability '%s'"), capability);
 }
 
+
 static int parse_bundle_header(int fd, struct bundle_header *header,
 			       const char *report_path)
 {
@@ -42,14 +46,18 @@  static int parse_bundle_header(int fd, struct bundle_header *header,
 
 	/* The bundle header begins with the signature */
 	if (strbuf_getwholeline_fd(&buf, fd, '\n') ||
-	    strcmp(buf.buf, bundle_signature)) {
+	    (strcmp(buf.buf, v2_bundle_signature) &&
+	     strcmp(buf.buf, v3_bundle_signature))) {
 		if (report_path)
-			error(_("'%s' does not look like a v2 bundle file"),
+			error(_("'%s' does not look like a v2 or v3 bundle file"),
 			      report_path);
 		status = -1;
 		goto abort;
 	}
 
+	header->version = buf.buf[3] - '0';
+	header->hash_algo = the_hash_algo;
+
 	/* The bundle header ends with an empty line */
 	while (!strbuf_getwholeline_fd(&buf, fd, '\n') &&
 	       buf.len && buf.buf[0] != '\n') {
@@ -57,21 +65,21 @@  static int parse_bundle_header(int fd, struct bundle_header *header,
 		int is_prereq = 0;
 		const char *p;
 
+		if (header->version == 3 && *buf.buf == '@') {
+			buf.buf[buf.len - 1] = '\0';
+			if (parse_capability(header, buf.buf + 1)) {
+				status = -1;
+				break;
+			}
+			continue;
+		}
+
 		if (*buf.buf == '-') {
 			is_prereq = 1;
 			strbuf_remove(&buf, 0, 1);
 		}
 		strbuf_rtrim(&buf);
 
-		if (!header->hash_algo) {
-			header->hash_algo = detect_hash_algo(&buf);
-			if (!header->hash_algo) {
-				error(_("unknown hash algorithm length"));
-				status = -1;
-				break;
-			}
-		}
-
 		/*
 		 * Tip lines have object name, SP, and refname.
 		 * Prerequisites have object name that is optionally
@@ -449,13 +457,14 @@  static int write_bundle_refs(int bundle_fd, struct rev_info *revs)
 }
 
 int create_bundle(struct repository *r, const char *path,
-		  int argc, const char **argv, struct argv_array *pack_options)
+		  int argc, const char **argv, struct argv_array *pack_options, int version)
 {
 	struct lock_file lock = LOCK_INIT;
 	int bundle_fd = -1;
 	int bundle_to_stdout;
 	int ref_count = 0;
 	struct rev_info revs;
+	int default_version = the_hash_algo == &hash_algos[GIT_HASH_SHA1] ? 2 : 3;
 
 	bundle_to_stdout = !strcmp(path, "-");
 	if (bundle_to_stdout)
@@ -464,8 +473,22 @@  int create_bundle(struct repository *r, const char *path,
 		bundle_fd = hold_lock_file_for_update(&lock, path,
 						      LOCK_DIE_ON_ERROR);
 
-	/* write signature */
-	write_or_die(bundle_fd, bundle_signature, strlen(bundle_signature));
+	if (version == -1)
+		version = default_version;
+
+	if (version < 2 || version > 3) {
+		die(_("unsupported bundle version %d"), version);
+	} else if (version < default_version) {
+		die(_("cannot write bundle version %d with algorithm %s"), version, the_hash_algo->name);
+	} else if (version == 2) {
+		write_or_die(bundle_fd, v2_bundle_signature, strlen(v2_bundle_signature));
+	} else {
+		const char *capability = "@object-format=";
+		write_or_die(bundle_fd, v3_bundle_signature, strlen(v3_bundle_signature));
+		write_or_die(bundle_fd, capability, strlen(capability));
+		write_or_die(bundle_fd, the_hash_algo->name, strlen(the_hash_algo->name));
+		write_or_die(bundle_fd, "\n", 1);
+	}
 
 	/* init revs to list objects for pack-objects later */
 	save_commit_buffer = 0;
diff --git a/bundle.h b/bundle.h
index 2dc9442024..70c84cab08 100644
--- a/bundle.h
+++ b/bundle.h
@@ -13,6 +13,7 @@  struct ref_list {
 };
 
 struct bundle_header {
+	unsigned version;
 	struct ref_list prerequisites;
 	struct ref_list references;
 	const struct git_hash_algo *hash_algo;
@@ -21,7 +22,8 @@  struct bundle_header {
 int is_bundle(const char *path, int quiet);
 int read_bundle_header(const char *path, struct bundle_header *header);
 int create_bundle(struct repository *r, const char *path,
-		  int argc, const char **argv, struct argv_array *pack_options);
+		  int argc, const char **argv, struct argv_array *pack_options,
+		  int version);
 int verify_bundle(struct repository *r, struct bundle_header *header, int verbose);
 #define BUNDLE_VERBOSE 1
 int unbundle(struct repository *r, struct bundle_header *header,
diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
index a66dbe0bde..9284b64e7e 100755
--- a/t/t5510-fetch.sh
+++ b/t/t5510-fetch.sh
@@ -281,15 +281,16 @@  test_expect_success 'create bundle 1' '
 	cd "$D" &&
 	echo >file updated again by origin &&
 	git commit -a -m "tip" &&
-	git bundle create bundle1 master^..master
+	git bundle create --version=3 bundle1 master^..master
 '
 
 test_expect_success 'header of bundle looks right' '
 	head -n 4 "$D"/bundle1 &&
 	head -n 1 "$D"/bundle1 | grep "^#" &&
-	head -n 2 "$D"/bundle1 | grep "^-$OID_REGEX " &&
-	head -n 3 "$D"/bundle1 | grep "^$OID_REGEX " &&
-	head -n 4 "$D"/bundle1 | grep "^$"
+	head -n 2 "$D"/bundle1 | grep "^@object-format=" &&
+	head -n 3 "$D"/bundle1 | grep "^-$OID_REGEX " &&
+	head -n 4 "$D"/bundle1 | grep "^$OID_REGEX " &&
+	head -n 5 "$D"/bundle1 | grep "^$"
 '
 
 test_expect_success 'create bundle 2' '
diff --git a/t/t5607-clone-bundle.sh b/t/t5607-clone-bundle.sh
index 6d5a977fcb..4a2a3968cc 100755
--- a/t/t5607-clone-bundle.sh
+++ b/t/t5607-clone-bundle.sh
@@ -4,6 +4,10 @@  test_description='some bundle related tests'
 . ./test-lib.sh
 
 test_expect_success 'setup' '
+	test_oid_cache <<-EOF &&
+	version sha1:2
+	version sha256:3
+	EOF
 	test_commit initial &&
 	test_tick &&
 	git tag -m tag tag &&
@@ -94,4 +98,27 @@  test_expect_success 'fetch SHA-1 from bundle' '
 	git fetch --no-tags foo/tip.bundle "$(cat hash)"
 '
 
+test_expect_success 'git bundle uses expected default format' '
+	git bundle create bundle HEAD^.. &&
+	head -n1 bundle | grep "^# v$(test_oid version) git bundle$"
+'
+
+test_expect_success 'git bundle v3 has expected contents' '
+	git branch side HEAD &&
+	git bundle create --version=3 bundle HEAD^..side &&
+	head -n2 bundle >actual &&
+	echo "# v3 git bundle" >expected &&
+	echo "@object-format=$(test_oid algo)" >>expected &&
+	test_cmp actual expected &&
+	git bundle verify bundle
+'
+
+test_expect_success 'git bundle v3 rejects unknown extensions' '
+	head -n2 bundle >new &&
+	echo "@unknown=silly" >>new &&
+	sed "1,2d" >>new &&
+	test_must_fail git bundle verify new 2>output &&
+	grep "unknown capability .unknown=silly." output
+'
+
 test_done