diff mbox series

[2/3] archive: implement protocol v2 archive command

Message ID 20180912053519.31085-3-steadmon@google.com (mailing list archive)
State Superseded
Headers show
Series [1/3] archive: use packet_reader for communications | expand

Commit Message

Josh Steadmon Sept. 12, 2018, 5:35 a.m. UTC
This adds a new archive command for protocol v2. The command expects
arguments in the form "argument X" which are passed unmodified to
git-upload-archive--writer.

This command works over the file://, Git, and SSH transports. HTTP
support will be added in a separate patch.

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 builtin/archive.c        | 45 +++++++++++++++++++++++++++-------------
 builtin/upload-archive.c | 44 ++++++++++++++++++++++++++++++++++++---
 t/t5000-tar-tree.sh      |  5 +++++
 3 files changed, 77 insertions(+), 17 deletions(-)

Comments

Stefan Beller Sept. 12, 2018, 10:28 p.m. UTC | #1
On Tue, Sep 11, 2018 at 10:36 PM Josh Steadmon <steadmon@google.com> wrote:
>
> This adds a new archive command for protocol v2. The command expects
> arguments in the form "argument X" which are passed unmodified to
> git-upload-archive--writer.
>
> This command works over the file://, Git, and SSH transports. HTTP
> support will be added in a separate patch.
>
> Signed-off-by: Josh Steadmon <steadmon@google.com>
> ---
>  builtin/archive.c        | 45 +++++++++++++++++++++++++++-------------
>  builtin/upload-archive.c | 44 ++++++++++++++++++++++++++++++++++++---
>  t/t5000-tar-tree.sh      |  5 +++++
>  3 files changed, 77 insertions(+), 17 deletions(-)
>
> diff --git a/builtin/archive.c b/builtin/archive.c
> index e54fc39ad..73831887d 100644
> --- a/builtin/archive.c
> +++ b/builtin/archive.c
> @@ -5,9 +5,11 @@
>  #include "cache.h"
>  #include "builtin.h"
>  #include "archive.h"
> +#include "connect.h"
>  #include "transport.h"
>  #include "parse-options.h"
>  #include "pkt-line.h"
> +#include "protocol.h"
>  #include "sideband.h"
>
>  static void create_output_file(const char *output_file)
> @@ -23,6 +25,13 @@ static void create_output_file(const char *output_file)
>         }
>  }
>
> +static int do_v2_command_and_cap(int out)
> +{
> +       packet_write_fmt(out, "command=archive\n");
> +       /* Capability list would go here, if we had any. */
> +       packet_delim(out);
> +}
> +
>  static int run_remote_archiver(int argc, const char **argv,
>                                const char *remote, const char *exec,
>                                const char *name_hint)
> @@ -32,6 +41,7 @@ static int run_remote_archiver(int argc, const char **argv,
>         struct remote *_remote;
>         struct packet_reader reader;
>         enum packet_read_status status;
> +       enum protocol_version version;
>
>         _remote = remote_get(remote);
>         if (!_remote->url[0])
> @@ -41,6 +51,11 @@ static int run_remote_archiver(int argc, const char **argv,
>
>         packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE);
>
> +       version = discover_version(&reader);
> +
> +       if (version == protocol_v2)
> +               do_v2_command_and_cap(fd[1]);
> +
>         /*
>          * Inject a fake --format field at the beginning of the
>          * arguments, with the format inferred from our output
> @@ -56,22 +71,24 @@ static int run_remote_archiver(int argc, const char **argv,
>                 packet_write_fmt(fd[1], "argument %s\n", argv[i]);
>         packet_flush(fd[1]);
>
> -       status = packet_reader_read(&reader);
> -
> -       if (status == PACKET_READ_FLUSH)
> -               die(_("git archive: expected ACK/NAK, got a flush packet"));
> -       if (strcmp(reader.buffer, "ACK")) {
> -               if (starts_with(reader.buffer, "NACK "))
> -                       die(_("git archive: NACK %s"), reader.buffer + 5);
> -               if (starts_with(reader.buffer, "ERR "))
> -                       die(_("remote error: %s"), reader.buffer + 4);
> -               die(_("git archive: protocol error"));

Maybe we also want to support v1
(which is v0 prefixed with one pkt_line saying it is v1).

    If (version == protocol_v1)
        /* drop version v1 line, and then follow v0 logic. */
        packet_reader_read(&reader);

Do we care about v1, or do we just ignore it here? why?
(Don't answer me here, but rather put it in the commit message)

> +       if (version == protocol_v0) {
> +               status = packet_reader_read(&reader);
> +
> +               if (status == PACKET_READ_FLUSH)
> +                       die(_("git archive: expected ACK/NAK, got a flush packet"));
> +               if (strcmp(reader.buffer, "ACK")) {
> +                       if (starts_with(reader.buffer, "NACK "))
> +                               die(_("git archive: NACK %s"), reader.buffer + 5);
> +                       if (starts_with(reader.buffer, "ERR "))
> +                               die(_("remote error: %s"), reader.buffer + 4);
> +                       die(_("git archive: protocol error"));
> +               }
> +
> +               status = packet_reader_read(&reader);
> +               if (status != PACKET_READ_FLUSH)
> +                       die(_("git archive: expected a flush"));
>         }
>
> -       status = packet_reader_read(&reader);
> -       if (status != PACKET_READ_FLUSH)
> -               die(_("git archive: expected a flush"));
> -
>         /* Now, start reading from fd[0] and spit it out to stdout */
>         rv = recv_sideband("archive", fd[0], 1);
>         rv |= transport_disconnect(transport);
> diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c
> index 25d911635..534e8fd56 100644
> --- a/builtin/upload-archive.c
> +++ b/builtin/upload-archive.c
> @@ -5,6 +5,7 @@
>  #include "builtin.h"
>  #include "archive.h"
>  #include "pkt-line.h"
> +#include "protocol.h"
>  #include "sideband.h"
>  #include "run-command.h"
>  #include "argv-array.h"
> @@ -73,13 +74,53 @@ static ssize_t process_input(int child_fd, int band)
>         return sz;
>  }
>
> +static int handle_v2_command_and_cap(void)
> +{
> +       struct packet_reader reader;
> +       enum packet_read_status status;
> +
> +       packet_reader_init(&reader, 0, NULL, 0, PACKET_READ_CHOMP_NEWLINE);
> +
> +       packet_write_fmt(1, "version 2\n");
> +       /*
> +        * We don't currently send any capabilities, but maybe we could list
> +        * supported archival formats?
> +        */
> +       packet_flush(1);
> +
> +       status = packet_reader_read(&reader);
> +       if (status != PACKET_READ_NORMAL ||
> +           strcmp(reader.buffer, "command=archive"))
> +               die(_("upload-archive: expected command=archive"));
> +       while (status == PACKET_READ_NORMAL) {
> +               /* We don't currently expect any client capabilities, but we
> +                * should still read (and ignore) any that happen to get sent.

/*
 * Makes sense to ignore the client capabilities here,
 * but the multi line comments take their opening
 * and closing line on a separate line. just like above.
 */

> +                */
> +               status = packet_reader_read(&reader);
> +       }
> +       if (status != PACKET_READ_DELIM)
> +               die(_("upload-archive: expected delim packet"));

This is upload-archive, which is a low level plumbing command
(see the main man page of git for an explanation of that category),
so we do not translate the error/die() calls. Besides, this is executed
on the server, which might have a different locale than the requesting
client?

Would asking for a setlocale() on the server side be an unreasonable
feature request for the capabilities (in a follow up patch, and then not
just for archive but also fetch/push, etc.)?

>  int cmd_upload_archive(int argc, const char **argv, const char *prefix)
>  {
>         struct child_process writer = { argv };
> +       enum protocol_version version = determine_protocol_version_server();
>
>         if (argc == 2 && !strcmp(argv[1], "-h"))
>                 usage(upload_archive_usage);
>
> +       if (version == protocol_v2)
> +               handle_v2_command_and_cap();
> +       else {

So if the client asked for v1, we still fall back to v0 here,
which answers my question above.

> +               packet_write_fmt(1, "ACK\n");
> +               packet_flush(1);
> +       }
> +
>         /*
>          * Set up sideband subprocess.
>          *
> @@ -96,9 +137,6 @@ int cmd_upload_archive(int argc, const char **argv, const char *prefix)
>                 die("upload-archive: %s", strerror(err));
>         }
>
> -       packet_write_fmt(1, "ACK\n");
> -       packet_flush(1);
> -
>         while (1) {
>                 struct pollfd pfd[2];
>
> diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
> index 2a97b27b0..4be74d6e9 100755
> --- a/t/t5000-tar-tree.sh
> +++ b/t/t5000-tar-tree.sh
> @@ -145,6 +145,11 @@ test_expect_success \
>
>  check_tar b
>
> +test_expect_success 'protocol v2 for remote' '
> +       GIT_PROTOCOL="version=2" git archive --remote=. HEAD >v2_remote.tar
> +'
> +check_tar v2_remote

Our current standard is to keep all executions inside
a test_expect_* block, but here it is hard to comply with
that as the check_tar function contains test_expect_*
and calling test_expect_* from within itself doesn't work
with our test suite.

So bonus points for a refactoring to bring t5000 up to
our current standard (c.f. t0020 for a reasonable new
code, and t2002 for older code, though that only covers
syntax, not functions)

The check itself is just testing that giving GIT_PROTOCOL=2
in the environment also let's you obtain an archive. It doesn't
test if the actual communication *is* v2.
See 5e3548ef161 (fetch: send server options when using
protocol v2, 2018-04-23) for an example how to sniff on the
network traffic in tests, i.e. use GIT_TRACE_PACKET=...
and grep on that?

Thanks,
Stefan
Junio C Hamano Sept. 13, 2018, 4:31 p.m. UTC | #2
Josh Steadmon <steadmon@google.com> writes:

> +static int do_v2_command_and_cap(int out)
> +{
> +	packet_write_fmt(out, "command=archive\n");
> +	/* Capability list would go here, if we had any. */
> +	packet_delim(out);
> +}
> +
>  static int run_remote_archiver(int argc, const char **argv,
>  			       const char *remote, const char *exec,
>  			       const char *name_hint)
> @@ -32,6 +41,7 @@ static int run_remote_archiver(int argc, const char **argv,
>  	struct remote *_remote;
>  	struct packet_reader reader;
>  	enum packet_read_status status;
> +	enum protocol_version version;
>  
>  	_remote = remote_get(remote);
>  	if (!_remote->url[0])
> @@ -41,6 +51,11 @@ static int run_remote_archiver(int argc, const char **argv,
>  
>  	packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE);
>  
> +	version = discover_version(&reader);

The original version of upload-archive that is correctly running on
the other end sends either NACK (unable to spawn) or ACK (ready to
serve) to us without waiting for us to speak first, so peeking that
with this discover_version() is a safe thing to do.

> +	if (version == protocol_v2)
> +		do_v2_command_and_cap(fd[1]);
> +

With proto v2, "server capabilities" have already been collected in
server_capabilities_v2 array in discover_version().  We are to pick
and ask the capabilities in that function and respond.  Right now we
do not need to do much, as we saw that very thin implementation of
that function above.

>  	/*
>  	 * Inject a fake --format field at the beginning of the
>  	 * arguments, with the format inferred from our output

And then after that, both the original and updated protocol lets us
send the archive format and arguments (like revs and pathspecs),
followed by a flush packet...

> @@ -56,22 +71,24 @@ static int run_remote_archiver(int argc, const char **argv,
>  		packet_write_fmt(fd[1], "argument %s\n", argv[i]);
>  	packet_flush(fd[1]);

... which is a piece of code shared between the protocol versions
that ends here.

> -	status = packet_reader_read(&reader);
> -
> -	if (status == PACKET_READ_FLUSH)
> -		die(_("git archive: expected ACK/NAK, got a flush packet"));
> -	if (strcmp(reader.buffer, "ACK")) {
> -		if (starts_with(reader.buffer, "NACK "))
> -			die(_("git archive: NACK %s"), reader.buffer + 5);
> -		if (starts_with(reader.buffer, "ERR "))
> -			die(_("remote error: %s"), reader.buffer + 4);
> -		die(_("git archive: protocol error"));
> +	if (version == protocol_v0) {
> +		status = packet_reader_read(&reader);
> +
> +		if (status == PACKET_READ_FLUSH)
> +			die(_("git archive: expected ACK/NAK, got a flush packet"));
> +		if (strcmp(reader.buffer, "ACK")) {
> +			if (starts_with(reader.buffer, "NACK "))
> +				die(_("git archive: NACK %s"), reader.buffer + 5);
> +			if (starts_with(reader.buffer, "ERR "))
> +				die(_("remote error: %s"), reader.buffer + 4);
> +			die(_("git archive: protocol error"));
> +		}
> +
> +		status = packet_reader_read(&reader);
> +		if (status != PACKET_READ_FLUSH)
> +			die(_("git archive: expected a flush"));
>  	}

The original protocol lets upload-archive to report failure to spawn
the writer backend process and lets us act on it.  We do not need a
similar support in the updated protocol and instead can jump right
into receiving the archive stream because...?

> -	status = packet_reader_read(&reader);
> -	if (status != PACKET_READ_FLUSH)
> -		die(_("git archive: expected a flush"));
> -

>  	/* Now, start reading from fd[0] and spit it out to stdout */
>  	rv = recv_sideband("archive", fd[0], 1);
>  	rv |= transport_disconnect(transport);



> diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c
> index 25d911635..534e8fd56 100644
> --- a/builtin/upload-archive.c
> +++ b/builtin/upload-archive.c
> @@ -5,6 +5,7 @@
>  #include "builtin.h"
>  #include "archive.h"
>  #include "pkt-line.h"
> +#include "protocol.h"
>  #include "sideband.h"
>  #include "run-command.h"
>  #include "argv-array.h"
> @@ -73,13 +74,53 @@ static ssize_t process_input(int child_fd, int band)
>  	return sz;
>  }
>  
> +static int handle_v2_command_and_cap(void)
> +{
> +	struct packet_reader reader;
> +	enum packet_read_status status;
> +
> +	packet_reader_init(&reader, 0, NULL, 0, PACKET_READ_CHOMP_NEWLINE);
> +
> +	packet_write_fmt(1, "version 2\n");

This lets the discover_version() on the other side notice that we
are speaking version 2.

> +	/*
> +	 * We don't currently send any capabilities, but maybe we could list
> +	 * supported archival formats?
> +	 */
> +	packet_flush(1);

process_capabilities_v2() expects the list of caps ends with a
flush, which is given here.

> +	status = packet_reader_read(&reader);
> +	if (status != PACKET_READ_NORMAL ||
> +	    strcmp(reader.buffer, "command=archive"))
> +		die(_("upload-archive: expected command=archive"));

The other side in do_v2_command_and_cap() would ask command=archive
and that is verified.  _() is unwanted, I suppose, by the way, as
you do not know what language the other side wants anyway.

> +	while (status == PACKET_READ_NORMAL) {
> +		/* We don't currently expect any client capabilities, but we
> +		 * should still read (and ignore) any that happen to get sent.
> +		 */
> +		status = packet_reader_read(&reader);

It is wrong to say we should "ignore".  If you are asked to behave
in a certain way by a capability that is not understood, the other
side expects you to honor that request and you have no idea how to
comply.  At least you should make sure that what is asked is among
the capabilities you offered (or you understand), and you should
error out when you see an unknown one, no?

> +	}
> +	if (status != PACKET_READ_DELIM)
> +		die(_("upload-archive: expected delim packet"));
> +
> +	/* Let git-upload-archive--writer handle the arguments. */

The choice of DELIM here over FLUSH is a bit curious, but it is
consistent between upload-archive and run-remote-archiver.

> +	return 0;
> +}
> +
>  int cmd_upload_archive(int argc, const char **argv, const char *prefix)
>  {
>  	struct child_process writer = { argv };
> +	enum protocol_version version = determine_protocol_version_server();
>  
>  	if (argc == 2 && !strcmp(argv[1], "-h"))
>  		usage(upload_archive_usage);
>  
> +	if (version == protocol_v2)
> +		handle_v2_command_and_cap();
> +	else {
> +		packet_write_fmt(1, "ACK\n");
> +		packet_flush(1);
> +	}
> +

This breaks the original protocol, no?  At this point we haven't
even tried to start the writer process, and letting the other side
go by giving ACK + flush prematurely.  After start_command() fails,
we may say NACK, but the other side is no longer listening to it.

>  	/*
>  	 * Set up sideband subprocess.
>  	 *
> @@ -96,9 +137,6 @@ int cmd_upload_archive(int argc, const char **argv, const char *prefix)
>  		die("upload-archive: %s", strerror(err));
>  	}
>  
> -	packet_write_fmt(1, "ACK\n");
> -	packet_flush(1);
> -
>  	while (1) {
>  		struct pollfd pfd[2];
>  
> diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
> index 2a97b27b0..4be74d6e9 100755
> --- a/t/t5000-tar-tree.sh
> +++ b/t/t5000-tar-tree.sh
> @@ -145,6 +145,11 @@ test_expect_success \
>  
>  check_tar b
>  
> +test_expect_success 'protocol v2 for remote' '
> +	GIT_PROTOCOL="version=2" git archive --remote=. HEAD >v2_remote.tar
> +'
> +check_tar v2_remote
> +
>  test_expect_success 'git archive --prefix=prefix/' '
>  	git archive --prefix=prefix/ HEAD >with_prefix.tar
>  '
Ævar Arnfjörð Bjarmason Sept. 13, 2018, 6:45 p.m. UTC | #3
On Wed, Sep 12 2018, Stefan Beller wrote:

> On Tue, Sep 11, 2018 at 10:36 PM Josh Steadmon <steadmon@google.com> wrote:
>> +                */
>> +               status = packet_reader_read(&reader);
>> +       }
>> +       if (status != PACKET_READ_DELIM)
>> +               die(_("upload-archive: expected delim packet"));
>
> This is upload-archive, which is a low level plumbing command
> (see the main man page of git for an explanation of that category),
> so we do not translate the error/die() calls. Besides, this is executed
> on the server, which might have a different locale than the requesting
> client?
>
> Would asking for a setlocale() on the server side be an unreasonable
> feature request for the capabilities (in a follow up patch, and then not
> just for archive but also fetch/push, etc.)?

This would be very nice to have, but as you suggest in some follow-up
change.

I think though that instead of doing setlocale() it would be better to
pass some flag saying we're operating in a machine-readable mode, and
then we'd (as part of the protocol defintion) say we're going to emit
GIT_ERR_UPLOAD_ARCHIVE_EXPECTED_DELIM_PACKET or whatever.

Advantages of doing that over a server-side setlocale():

 1) Purely for translation purposes, users can update to a newer client
    to get new translations, even though they're talking to an old
    server.

 2) Again, only for translation purposes, servers may not have the
    appropriate locales generated and/or linked to libgettext.

 3) Ditto, some clients that aren't git.git may want/need to emit
    different translation messages to their consumers than what we have,
    think some GUI client / Emacs magit etc. whose UI is different from
    ours.

 4) Aside from translation purposes, getting a machine-readable
    "push/pull" etc. mode would be very handy. E.g. now you need to
    parse stderr to see why exactly your push failed (hook denied, or
    non-fast-forward, or non-fast-forward where there was a lock race
    condition? ...).

I also wonder if something like #4 wouldn't compliment something like
the proposed structured logging[1]. I.e. even though we'd like to run
git.git, and present exactly the message to the user we do now, we might
want to run in such a machine-readable mode under the hood when talking
to the server so we can log exactly how the push went / how it failed
for the purposes of aggregation.

1. https://public-inbox.org/git/20180713165621.52017-2-git@jeffhostetler.com/
Jonathan Nieder Sept. 14, 2018, 5:39 a.m. UTC | #4
Hi,

Josh Steadmon wrote:

> This adds a new archive command for protocol v2. The command expects
> arguments in the form "argument X" which are passed unmodified to
> git-upload-archive--writer.
>
> This command works over the file://, Git, and SSH transports. HTTP
> support will be added in a separate patch.
>
> Signed-off-by: Josh Steadmon <steadmon@google.com>
> ---
>  builtin/archive.c        | 45 +++++++++++++++++++++++++++-------------
>  builtin/upload-archive.c | 44 ++++++++++++++++++++++++++++++++++++---
>  t/t5000-tar-tree.sh      |  5 +++++
>  3 files changed, 77 insertions(+), 17 deletions(-)

I like the diffstat. :)

Can this include some docs in Documentation/technical/ as well, to
help other implementors to understand the protocol so they can
interoperate?

Thanks,
Jonathan
Jonathan Nieder Sept. 14, 2018, 6:05 a.m. UTC | #5
Ævar Arnfjörð Bjarmason wrote:
> On Wed, Sep 12 2018, Stefan Beller wrote:

>> Would asking for a setlocale() on the server side be an unreasonable
>> feature request for the capabilities (in a follow up patch, and then not
>> just for archive but also fetch/push, etc.)?
>
> This would be very nice to have, but as you suggest in some follow-up
> change.

Indeed, I think we've gone pretty far afield from the goal of this
patch series.

> I think though that instead of doing setlocale() it would be better to
> pass some flag saying we're operating in a machine-readable mode, and
> then we'd (as part of the protocol defintion) say we're going to emit
> GIT_ERR_UPLOAD_ARCHIVE_EXPECTED_DELIM_PACKET or whatever.

I think you're suggesting client-side message generation, and that is
one way to handle internationalization of server output.

The main downside is when the server really does want to provide a
custom error message.  For that, we'd need

 1. To propagate LANG to the server, so it knows what human language
    to generate messages in.

 2. On the server side, to produce messages in that language if
    available, with an appropriate fallback if not.

We've been thinking of doing at least (1) using the same trick as
server-options use (cramming it into client capabilities).

It is difficult to use setlocale for this because it affects the whole
program (problematic for a threaded server) and affects features like
collation order instead of just message generation (problematic for
many things).  Does gettext have a variant that takes a locale_t
argument?

[...]
>  4) Aside from translation purposes, getting a machine-readable
>     "push/pull" etc. mode would be very handy. E.g. now you need to
>     parse stderr to see why exactly your push failed (hook denied, or
>     non-fast-forward, or non-fast-forward where there was a lock race
>     condition? ...).

Indeed, this is a good reason to provide error codes instead of (in
the case where the message doesn't add anything to it) or alongside
(in case the error message is more specialized) human-oriented error
messages.

Thanks,
Jonathan
Ævar Arnfjörð Bjarmason Sept. 14, 2018, 2:31 p.m. UTC | #6
On Fri, Sep 14 2018, Jonathan Nieder wrote:

> Ævar Arnfjörð Bjarmason wrote:
>> On Wed, Sep 12 2018, Stefan Beller wrote:
>
>>> Would asking for a setlocale() on the server side be an unreasonable
>>> feature request for the capabilities (in a follow up patch, and then not
>>> just for archive but also fetch/push, etc.)?
>>
>> This would be very nice to have, but as you suggest in some follow-up
>> change.
>
> Indeed, I think we've gone pretty far afield from the goal of this
> patch series.
>
>> I think though that instead of doing setlocale() it would be better to
>> pass some flag saying we're operating in a machine-readable mode, and
>> then we'd (as part of the protocol defintion) say we're going to emit
>> GIT_ERR_UPLOAD_ARCHIVE_EXPECTED_DELIM_PACKET or whatever.
>
> I think you're suggesting client-side message generation, and that is
> one way to handle internationalization of server output.
>
> The main downside is when the server really does want to provide a
> custom error message.  For that, we'd need

Yeah you can't do it for everything. E.g. hooks will want to spew out
custom messages, and this hypothetical protocol extension won't have
codes for those. So you'll still need to pass $LANG along.

>  1. To propagate LANG to the server, so it knows what human language
>     to generate messages in.
>
>  2. On the server side, to produce messages in that language if
>     available, with an appropriate fallback if not.
>
> We've been thinking of doing at least (1) using the same trick as
> server-options use (cramming it into client capabilities).
>
> It is difficult to use setlocale for this because it affects the whole
> program (problematic for a threaded server) and affects features like
> collation order instead of just message generation (problematic for
> many things).  Does gettext have a variant that takes a locale_t
> argument?

No, its API is fairly crappy and depends on setlocale().

Keep in mind though that we're not tied to that API. E.g. one way to
work around this problem is to simply loop through all the languages we
have translations for at server startup, for each one call setlocale()
and gettext(), and save the result in a hash table for runtime lookup,
then you'd just call sprintf(hash[language][message_id], ...) at
runtime.

That's all libintl is really doing under the hood, in a roundabout way
where calls to setlocale() determine what table we're looking things up
in.

The reason I opted to go for gettext to begin with was mainly a) it was
there b) the ubiquitous availability of tooling for translators when it
comes to the *.po files.

But the API for looking things up at runtime is fairly small, and easy
to replace. We could e.g. replace all of our own gettext.[ch] wrapper
with something that works somewhat like what I described above, with
some extra build step to extract the relevant data from the *.mo files
(or parse the *.po directly).

> [...]
>>  4) Aside from translation purposes, getting a machine-readable
>>     "push/pull" etc. mode would be very handy. E.g. now you need to
>>     parse stderr to see why exactly your push failed (hook denied, or
>>     non-fast-forward, or non-fast-forward where there was a lock race
>>     condition? ...).
>
> Indeed, this is a good reason to provide error codes instead of (in
> the case where the message doesn't add anything to it) or alongside
> (in case the error message is more specialized) human-oriented error
> messages.
Junio C Hamano Sept. 14, 2018, 4:14 p.m. UTC | #7
Jonathan Nieder <jrnieder@gmail.com> writes:

>> I think though that instead of doing setlocale() it would be better to
>> pass some flag saying we're operating in a machine-readable mode, and
>> then we'd (as part of the protocol defintion) say we're going to emit
>> GIT_ERR_UPLOAD_ARCHIVE_EXPECTED_DELIM_PACKET or whatever.
>
> I think you're suggesting client-side message generation, and that is
> one way to handle internationalization of server output.
>
> The main downside is when the server really does want to provide a
> custom error message.  For that, we'd need
>
>  1. To propagate LANG to the server, so it knows what human language
>     to generate messages in.
>
>  2. On the server side, to produce messages in that language if
>     available, with an appropriate fallback if not.

That is one way to do so, but it does not have to be the only way, I
would think.  You can send a machine parsable message in pieces, and
assemble the parts of speech into a message at the receiving end.
Like sending a msgid to identify an entry in the .pot file, and
values to be filled in.
Jonathan Nieder Sept. 14, 2018, 4:19 p.m. UTC | #8
Junio C Hamano wrote:
> Jonathan Nieder <jrnieder@gmail.com> writes:

>>> I think though that instead of doing setlocale() it would be better to
>>> pass some flag saying we're operating in a machine-readable mode, and
>>> then we'd (as part of the protocol defintion) say we're going to emit
>>> GIT_ERR_UPLOAD_ARCHIVE_EXPECTED_DELIM_PACKET or whatever.
>>
>> I think you're suggesting client-side message generation, and that is
>> one way to handle internationalization of server output.
>>
>> The main downside is when the server really does want to provide a
>> custom error message.  For that, we'd need
>>
>>  1. To propagate LANG to the server, so it knows what human language
>>     to generate messages in.
>>
>>  2. On the server side, to produce messages in that language if
>>     available, with an appropriate fallback if not.
>
> That is one way to do so, but it does not have to be the only way, I
> would think.  You can send a machine parsable message in pieces, and
> assemble the parts of speech into a message at the receiving end.
> Like sending a msgid to identify an entry in the .pot file, and
> values to be filled in.

That works if the same party controls the client and server and the
client is up to date enough to know about every message the server
would want to send.

It doesn't work for
- hooks
- alternate server implementations
- messages involved in an emergency fix
- ... etc ...

Don't get me wrong: for messages with a machine as an audience, error
codes or similar structured errors are a great way to go, and getting
client-side generation of messages for humans (not to mention styling,
etc) are a nice bonus there.  I stand by what's in the message you're
replying to, though: if we actually want to be able to consistently
provide useful messages to people who do not like to read English,
then client-side generation won't get us all the way there.

Jonathan
diff mbox series

Patch

diff --git a/builtin/archive.c b/builtin/archive.c
index e54fc39ad..73831887d 100644
--- a/builtin/archive.c
+++ b/builtin/archive.c
@@ -5,9 +5,11 @@ 
 #include "cache.h"
 #include "builtin.h"
 #include "archive.h"
+#include "connect.h"
 #include "transport.h"
 #include "parse-options.h"
 #include "pkt-line.h"
+#include "protocol.h"
 #include "sideband.h"
 
 static void create_output_file(const char *output_file)
@@ -23,6 +25,13 @@  static void create_output_file(const char *output_file)
 	}
 }
 
+static int do_v2_command_and_cap(int out)
+{
+	packet_write_fmt(out, "command=archive\n");
+	/* Capability list would go here, if we had any. */
+	packet_delim(out);
+}
+
 static int run_remote_archiver(int argc, const char **argv,
 			       const char *remote, const char *exec,
 			       const char *name_hint)
@@ -32,6 +41,7 @@  static int run_remote_archiver(int argc, const char **argv,
 	struct remote *_remote;
 	struct packet_reader reader;
 	enum packet_read_status status;
+	enum protocol_version version;
 
 	_remote = remote_get(remote);
 	if (!_remote->url[0])
@@ -41,6 +51,11 @@  static int run_remote_archiver(int argc, const char **argv,
 
 	packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE);
 
+	version = discover_version(&reader);
+
+	if (version == protocol_v2)
+		do_v2_command_and_cap(fd[1]);
+
 	/*
 	 * Inject a fake --format field at the beginning of the
 	 * arguments, with the format inferred from our output
@@ -56,22 +71,24 @@  static int run_remote_archiver(int argc, const char **argv,
 		packet_write_fmt(fd[1], "argument %s\n", argv[i]);
 	packet_flush(fd[1]);
 
-	status = packet_reader_read(&reader);
-
-	if (status == PACKET_READ_FLUSH)
-		die(_("git archive: expected ACK/NAK, got a flush packet"));
-	if (strcmp(reader.buffer, "ACK")) {
-		if (starts_with(reader.buffer, "NACK "))
-			die(_("git archive: NACK %s"), reader.buffer + 5);
-		if (starts_with(reader.buffer, "ERR "))
-			die(_("remote error: %s"), reader.buffer + 4);
-		die(_("git archive: protocol error"));
+	if (version == protocol_v0) {
+		status = packet_reader_read(&reader);
+
+		if (status == PACKET_READ_FLUSH)
+			die(_("git archive: expected ACK/NAK, got a flush packet"));
+		if (strcmp(reader.buffer, "ACK")) {
+			if (starts_with(reader.buffer, "NACK "))
+				die(_("git archive: NACK %s"), reader.buffer + 5);
+			if (starts_with(reader.buffer, "ERR "))
+				die(_("remote error: %s"), reader.buffer + 4);
+			die(_("git archive: protocol error"));
+		}
+
+		status = packet_reader_read(&reader);
+		if (status != PACKET_READ_FLUSH)
+			die(_("git archive: expected a flush"));
 	}
 
-	status = packet_reader_read(&reader);
-	if (status != PACKET_READ_FLUSH)
-		die(_("git archive: expected a flush"));
-
 	/* Now, start reading from fd[0] and spit it out to stdout */
 	rv = recv_sideband("archive", fd[0], 1);
 	rv |= transport_disconnect(transport);
diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c
index 25d911635..534e8fd56 100644
--- a/builtin/upload-archive.c
+++ b/builtin/upload-archive.c
@@ -5,6 +5,7 @@ 
 #include "builtin.h"
 #include "archive.h"
 #include "pkt-line.h"
+#include "protocol.h"
 #include "sideband.h"
 #include "run-command.h"
 #include "argv-array.h"
@@ -73,13 +74,53 @@  static ssize_t process_input(int child_fd, int band)
 	return sz;
 }
 
+static int handle_v2_command_and_cap(void)
+{
+	struct packet_reader reader;
+	enum packet_read_status status;
+
+	packet_reader_init(&reader, 0, NULL, 0, PACKET_READ_CHOMP_NEWLINE);
+
+	packet_write_fmt(1, "version 2\n");
+	/*
+	 * We don't currently send any capabilities, but maybe we could list
+	 * supported archival formats?
+	 */
+	packet_flush(1);
+
+	status = packet_reader_read(&reader);
+	if (status != PACKET_READ_NORMAL ||
+	    strcmp(reader.buffer, "command=archive"))
+		die(_("upload-archive: expected command=archive"));
+	while (status == PACKET_READ_NORMAL) {
+		/* We don't currently expect any client capabilities, but we
+		 * should still read (and ignore) any that happen to get sent.
+		 */
+		status = packet_reader_read(&reader);
+	}
+	if (status != PACKET_READ_DELIM)
+		die(_("upload-archive: expected delim packet"));
+
+	/* Let git-upload-archive--writer handle the arguments. */
+
+	return 0;
+}
+
 int cmd_upload_archive(int argc, const char **argv, const char *prefix)
 {
 	struct child_process writer = { argv };
+	enum protocol_version version = determine_protocol_version_server();
 
 	if (argc == 2 && !strcmp(argv[1], "-h"))
 		usage(upload_archive_usage);
 
+	if (version == protocol_v2)
+		handle_v2_command_and_cap();
+	else {
+		packet_write_fmt(1, "ACK\n");
+		packet_flush(1);
+	}
+
 	/*
 	 * Set up sideband subprocess.
 	 *
@@ -96,9 +137,6 @@  int cmd_upload_archive(int argc, const char **argv, const char *prefix)
 		die("upload-archive: %s", strerror(err));
 	}
 
-	packet_write_fmt(1, "ACK\n");
-	packet_flush(1);
-
 	while (1) {
 		struct pollfd pfd[2];
 
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 2a97b27b0..4be74d6e9 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -145,6 +145,11 @@  test_expect_success \
 
 check_tar b
 
+test_expect_success 'protocol v2 for remote' '
+	GIT_PROTOCOL="version=2" git archive --remote=. HEAD >v2_remote.tar
+'
+check_tar v2_remote
+
 test_expect_success 'git archive --prefix=prefix/' '
 	git archive --prefix=prefix/ HEAD >with_prefix.tar
 '