diff mbox series

[2/4] http-fetch: allow custom index-pack args

Message ID 3032117ee747e72f6208eab7f960737d25f8a82a.1611455251.git.jonathantanmy@google.com (mailing list archive)
State New
Headers show
Series Check .gitmodules when using packfile URIs | expand

Commit Message

Jonathan Tan Jan. 24, 2021, 2:34 a.m. UTC
This is the next step in teaching fetch-pack to pass its index-pack
arguments when processing packfiles referenced by URIs.

The "--keep" in fetch-pack.c will be replaced with a full message in a
subsequent commit.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/git-http-fetch.txt |  9 ++++++--
 fetch-pack.c                     |  1 +
 http-fetch.c                     | 35 +++++++++++++++++++++++++++-----
 t/t5550-http-fetch-dumb.sh       |  3 ++-
 4 files changed, 40 insertions(+), 8 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Jan. 24, 2021, 11:52 a.m. UTC | #1
On Sun, Jan 24 2021, Jonathan Tan wrote:

>  --packfile=<hash>::
> -	Instead of a commit id on the command line (which is not expected in
> +	For internal use only. Instead of a commit id on the command line (which is not expected in

Leaves the rest at ~79 and this long line at ~100. Perhaps a follow-up
change to re-word-wrap would be in order?
Jonathan Tan Jan. 28, 2021, 12:32 a.m. UTC | #2
> On Sun, Jan 24 2021, Jonathan Tan wrote:
> 
> >  --packfile=<hash>::
> > -	Instead of a commit id on the command line (which is not expected in
> > +	For internal use only. Instead of a commit id on the command line (which is not expected in
> 
> Leaves the rest at ~79 and this long line at ~100. Perhaps a follow-up
> change to re-word-wrap would be in order?

Hmm...I'll split that onto two lines then. I don't think it's worth the
extra commit in history to have it exactly wrapped right, so I'll forgo
the follow-up change for now.
Josh Steadmon Feb. 16, 2021, 8:49 p.m. UTC | #3
On 2021.01.23 18:34, Jonathan Tan wrote:
> This is the next step in teaching fetch-pack to pass its index-pack
> arguments when processing packfiles referenced by URIs.
> 
> The "--keep" in fetch-pack.c will be replaced with a full message in a
> subsequent commit.
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  Documentation/git-http-fetch.txt |  9 ++++++--
>  fetch-pack.c                     |  1 +
>  http-fetch.c                     | 35 +++++++++++++++++++++++++++-----
>  t/t5550-http-fetch-dumb.sh       |  3 ++-
>  4 files changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt
> index 4deb4893f5..aa171088e8 100644
> --- a/Documentation/git-http-fetch.txt
> +++ b/Documentation/git-http-fetch.txt
> @@ -41,11 +41,16 @@ commit-id::
>  		<commit-id>['\t'<filename-as-in--w>]
>  
>  --packfile=<hash>::
> -	Instead of a commit id on the command line (which is not expected in
> +	For internal use only. Instead of a commit id on the command line (which is not expected in
>  	this case), 'git http-fetch' fetches the packfile directly at the given
>  	URL and uses index-pack to generate corresponding .idx and .keep files.
>  	The hash is used to determine the name of the temporary file and is
> -	arbitrary. The output of index-pack is printed to stdout.
> +	arbitrary. The output of index-pack is printed to stdout. Requires
> +	--index-pack-args.
> +
> +--index-pack-args=<args>::
> +	For internal use only. The command to run on the contents of the
> +	downloaded pack. Arguments are URL-encoded separated by spaces.

I'm a bit skeptical of using URL encoding to work around embedded
spaces. I believe in Emily's config-based hooks series, she wrote an
argument parser to pull repeated arguments into a strvec, could you do
something like that here?

I'm sympathetic to the idea that since this is an internal-only flag, we
can be a bit weird with the argument format, though.

>  --recover::
>  	Verify that everything reachable from target is fetched.  Used after
> diff --git a/fetch-pack.c b/fetch-pack.c
> index 876f90c759..274ae602f7 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1645,6 +1645,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  		strvec_pushf(&cmd.args, "--packfile=%.*s",
>  			     (int) the_hash_algo->hexsz,
>  			     packfile_uris.items[i].string);
> +		strvec_push(&cmd.args, "--index-pack-args=index-pack --stdin --keep");
>  		strvec_push(&cmd.args, uri);
>  		cmd.git_cmd = 1;
>  		cmd.no_stdin = 1;
> diff --git a/http-fetch.c b/http-fetch.c
> index 2d1d9d054f..12feb84e71 100644
> --- a/http-fetch.c
> +++ b/http-fetch.c
> @@ -3,6 +3,7 @@
>  #include "exec-cmd.h"
>  #include "http.h"
>  #include "walker.h"
> +#include "strvec.h"
>  
>  static const char http_fetch_usage[] = "git http-fetch "
>  "[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url";
> @@ -43,11 +44,9 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely,
>  	return rc;
>  }
>  
> -static const char *index_pack_args[] =
> -	{"index-pack", "--stdin", "--keep", NULL};
> -
>  static void fetch_single_packfile(struct object_id *packfile_hash,
> -				  const char *url) {
> +				  const char *url,
> +				  const char **index_pack_args) {
>  	struct http_pack_request *preq;
>  	struct slot_results results;
>  	int ret;
> @@ -90,6 +89,7 @@ int cmd_main(int argc, const char **argv)
>  	int packfile = 0;
>  	int nongit;
>  	struct object_id packfile_hash;
> +	const char *index_pack_args = NULL;
>  
>  	setup_git_directory_gently(&nongit);
>  
> @@ -116,6 +116,8 @@ int cmd_main(int argc, const char **argv)
>  			packfile = 1;
>  			if (parse_oid_hex(p, &packfile_hash, &end) || *end)
>  				die(_("argument to --packfile must be a valid hash (got '%s')"), p);
> +		} else if (skip_prefix(argv[arg], "--index-pack-args=", &p)) {
> +			index_pack_args = p;
>  		}
>  		arg++;
>  	}
> @@ -128,10 +130,33 @@ int cmd_main(int argc, const char **argv)
>  	git_config(git_default_config, NULL);
>  
>  	if (packfile) {
> -		fetch_single_packfile(&packfile_hash, argv[arg]);
> +		struct strvec encoded = STRVEC_INIT;
> +		char **raw;
> +		int i;
> +
> +		if (!index_pack_args)
> +			die(_("--packfile requires --index-pack-args"));
> +
> +		strvec_split(&encoded, index_pack_args);
> +
> +		CALLOC_ARRAY(raw, encoded.nr + 1);
> +		for (i = 0; i < encoded.nr; i++)
> +			raw[i] = url_percent_decode(encoded.v[i]);
> +
> +		fetch_single_packfile(&packfile_hash, argv[arg],
> +				      (const char **) raw);
> +
> +		for (i = 0; i < encoded.nr; i++)
> +			free(raw[i]);
> +		free(raw);
> +		strvec_clear(&encoded);
> +
>  		return 0;
>  	}
>  
> +	if (index_pack_args)
> +		die(_("--index-pack-args can only be used with --packfile"));
> +
>  	if (commits_on_stdin) {
>  		commits = walker_targets_stdin(&commit_id, &write_ref);
>  	} else {
> diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
> index 483578b2d7..af90e7efed 100755
> --- a/t/t5550-http-fetch-dumb.sh
> +++ b/t/t5550-http-fetch-dumb.sh
> @@ -224,7 +224,8 @@ test_expect_success 'http-fetch --packfile' '
>  
>  	git init packfileclient &&
>  	p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
> -	git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
> +	git -C packfileclient http-fetch --packfile=$ARBITRARY \
> +		--index-pack-args="index-pack --stdin --keep" "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
>  
>  	grep "^keep.[0-9a-f]\{16,\}$" out &&
>  	cut -c6- out >packhash &&
> -- 
> 2.30.0.280.ga3ce27912f-goog
>
Junio C Hamano Feb. 16, 2021, 10:57 p.m. UTC | #4
Josh Steadmon <steadmon@google.com> writes:

>> +--index-pack-args=<args>::
>> +	For internal use only. The command to run on the contents of the
>> +	downloaded pack. Arguments are URL-encoded separated by spaces.
>
> I'm a bit skeptical of using URL encoding to work around embedded
> spaces. I believe in Emily's config-based hooks series, she wrote an
> argument parser to pull repeated arguments into a strvec, could you do
> something like that here?
>
> I'm sympathetic to the idea that since this is an internal-only flag, we
> can be a bit weird with the argument format, though.

We tend to prefer quote.c::sq_quote*() suite of quoting; does this
codepath have very different constraints that require different
encoding?

Thanks.
Jonathan Tan Feb. 17, 2021, 7:46 p.m. UTC | #5
> Josh Steadmon <steadmon@google.com> writes:
> 
> >> +--index-pack-args=<args>::
> >> +	For internal use only. The command to run on the contents of the
> >> +	downloaded pack. Arguments are URL-encoded separated by spaces.
> >
> > I'm a bit skeptical of using URL encoding to work around embedded
> > spaces. I believe in Emily's config-based hooks series, she wrote an
> > argument parser to pull repeated arguments into a strvec, could you do
> > something like that here?
> >
> > I'm sympathetic to the idea that since this is an internal-only flag, we
> > can be a bit weird with the argument format, though.
> 
> We tend to prefer quote.c::sq_quote*() suite of quoting; does this
> codepath have very different constraints that require different
> encoding?

My main issue was that I needed to join arbitrary strings and then split
them, which is why I URL-encoded them (so that they would no longer
contain spaces) and then used spaces as the "join" separator. With
Josh's suggestion, I wouldn't need any sort of encoding or quoting, so I
think I'll use that.
diff mbox series

Patch

diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt
index 4deb4893f5..aa171088e8 100644
--- a/Documentation/git-http-fetch.txt
+++ b/Documentation/git-http-fetch.txt
@@ -41,11 +41,16 @@  commit-id::
 		<commit-id>['\t'<filename-as-in--w>]
 
 --packfile=<hash>::
-	Instead of a commit id on the command line (which is not expected in
+	For internal use only. Instead of a commit id on the command line (which is not expected in
 	this case), 'git http-fetch' fetches the packfile directly at the given
 	URL and uses index-pack to generate corresponding .idx and .keep files.
 	The hash is used to determine the name of the temporary file and is
-	arbitrary. The output of index-pack is printed to stdout.
+	arbitrary. The output of index-pack is printed to stdout. Requires
+	--index-pack-args.
+
+--index-pack-args=<args>::
+	For internal use only. The command to run on the contents of the
+	downloaded pack. Arguments are URL-encoded separated by spaces.
 
 --recover::
 	Verify that everything reachable from target is fetched.  Used after
diff --git a/fetch-pack.c b/fetch-pack.c
index 876f90c759..274ae602f7 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1645,6 +1645,7 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		strvec_pushf(&cmd.args, "--packfile=%.*s",
 			     (int) the_hash_algo->hexsz,
 			     packfile_uris.items[i].string);
+		strvec_push(&cmd.args, "--index-pack-args=index-pack --stdin --keep");
 		strvec_push(&cmd.args, uri);
 		cmd.git_cmd = 1;
 		cmd.no_stdin = 1;
diff --git a/http-fetch.c b/http-fetch.c
index 2d1d9d054f..12feb84e71 100644
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -3,6 +3,7 @@ 
 #include "exec-cmd.h"
 #include "http.h"
 #include "walker.h"
+#include "strvec.h"
 
 static const char http_fetch_usage[] = "git http-fetch "
 "[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url";
@@ -43,11 +44,9 @@  static int fetch_using_walker(const char *raw_url, int get_verbosely,
 	return rc;
 }
 
-static const char *index_pack_args[] =
-	{"index-pack", "--stdin", "--keep", NULL};
-
 static void fetch_single_packfile(struct object_id *packfile_hash,
-				  const char *url) {
+				  const char *url,
+				  const char **index_pack_args) {
 	struct http_pack_request *preq;
 	struct slot_results results;
 	int ret;
@@ -90,6 +89,7 @@  int cmd_main(int argc, const char **argv)
 	int packfile = 0;
 	int nongit;
 	struct object_id packfile_hash;
+	const char *index_pack_args = NULL;
 
 	setup_git_directory_gently(&nongit);
 
@@ -116,6 +116,8 @@  int cmd_main(int argc, const char **argv)
 			packfile = 1;
 			if (parse_oid_hex(p, &packfile_hash, &end) || *end)
 				die(_("argument to --packfile must be a valid hash (got '%s')"), p);
+		} else if (skip_prefix(argv[arg], "--index-pack-args=", &p)) {
+			index_pack_args = p;
 		}
 		arg++;
 	}
@@ -128,10 +130,33 @@  int cmd_main(int argc, const char **argv)
 	git_config(git_default_config, NULL);
 
 	if (packfile) {
-		fetch_single_packfile(&packfile_hash, argv[arg]);
+		struct strvec encoded = STRVEC_INIT;
+		char **raw;
+		int i;
+
+		if (!index_pack_args)
+			die(_("--packfile requires --index-pack-args"));
+
+		strvec_split(&encoded, index_pack_args);
+
+		CALLOC_ARRAY(raw, encoded.nr + 1);
+		for (i = 0; i < encoded.nr; i++)
+			raw[i] = url_percent_decode(encoded.v[i]);
+
+		fetch_single_packfile(&packfile_hash, argv[arg],
+				      (const char **) raw);
+
+		for (i = 0; i < encoded.nr; i++)
+			free(raw[i]);
+		free(raw);
+		strvec_clear(&encoded);
+
 		return 0;
 	}
 
+	if (index_pack_args)
+		die(_("--index-pack-args can only be used with --packfile"));
+
 	if (commits_on_stdin) {
 		commits = walker_targets_stdin(&commit_id, &write_ref);
 	} else {
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index 483578b2d7..af90e7efed 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -224,7 +224,8 @@  test_expect_success 'http-fetch --packfile' '
 
 	git init packfileclient &&
 	p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
-	git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
+	git -C packfileclient http-fetch --packfile=$ARBITRARY \
+		--index-pack-args="index-pack --stdin --keep" "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
 
 	grep "^keep.[0-9a-f]\{16,\}$" out &&
 	cut -c6- out >packhash &&