Message ID | 20240628190503.67389-5-eric.peijian@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | cat-file: add remote-object-info to batch-command | expand |
Eric Ju <eric.peijian@gmail.com> writes: > diff --git a/transport.c b/transport.c > index 83ddea8fbc..2847aa3f3c 100644 > --- a/transport.c > +++ b/transport.c > @@ -436,11 +504,27 @@ static int fetch_refs_via_pack(struct transport *transport, > args.server_options = transport->server_options; > args.negotiation_tips = data->options.negotiation_tips; > args.reject_shallow_remote = transport->smart_options->reject_shallow; > - > - if (!data->finished_handshake) { > - int i; > + args.object_info = transport->smart_options->object_info; > + > + if (transport->smart_options && transport->smart_options->object_info) { > + struct ref *ref = object_info_refs; > + > + if (!fetch_object_info(transport, data->options.object_info_data)) > + goto cleanup; > + args.object_info_data = data->options.object_info_data; > + args.quiet = 1; > + args.no_progress = 1; > + for (size_t i = 0; i < transport->smart_options->object_info_oids->nr; i++) { > + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); > + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); Any reason why you're not using the subscript operator (square brackets) like this: + temp_ref->old_oid = transport->smart_options->object_info_oids->oid[i]; > + temp_ref->exact_oid = 1; > + ref->next = temp_ref; > + ref = ref->next; > + } > + transport->remote_refs = object_info_refs->next; I find it a bit weird you're allocating object_info_refs, only to use it to point to the next. Can I suggest a little refactor: ----8<-----8<---- diff --git a/transport.c b/transport.c index 662faa004e..56cb3a1693 100644 --- a/transport.c +++ b/transport.c @@ -479,7 +479,7 @@ static int fetch_refs_via_pack(struct transport *transport, struct ref *refs = NULL; struct fetch_pack_args args; struct ref *refs_tmp = NULL; - struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); + struct ref *object_info_refs = NULL; memset(&args, 0, sizeof(args)); args.uploadpack = data->options.uploadpack; @@ -509,7 +509,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.object_info = transport->smart_options->object_info; if (transport->smart_options && transport->smart_options->object_info) { - struct ref *ref = object_info_refs; + struct ref *ref = object_info_refs = xcalloc(1, sizeof (struct ref)); if (!fetch_object_info(transport, data->options.object_info_data)) goto cleanup; @@ -517,13 +517,12 @@ static int fetch_refs_via_pack(struct transport *transport, args.quiet = 1; args.no_progress = 1; for (size_t i = 0; i < transport->smart_options->object_info_oids->nr; i++) { - struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); - temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); - temp_ref->exact_oid = 1; - ref->next = temp_ref; + ref->old_oid = transport->smart_options->object_info_oids->oid[i]; + ref->exact_oid = 1; + ref->next = xcalloc(1, sizeof (struct ref)); ref = ref->next; } - transport->remote_refs = object_info_refs->next; + transport->remote_refs = object_info_refs; } else if (!data->finished_handshake) { int must_list_refs = 0; for (int i = 0; i < nr_heads; i++) { @@ -565,7 +564,7 @@ static int fetch_refs_via_pack(struct transport *transport, data->finished_handshake = 0; if (args.object_info) { - struct ref *ref_cpy_reader = object_info_refs->next; + struct ref *ref_cpy_reader = object_info_refs; for (int i = 0; ref_cpy_reader; i++) { oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args.object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE); ref_cpy_reader = ref_cpy_reader->next; ----8<-----8<---- To be honest, I'm not sure it works, because fetch_object_info() always seem to return a non-zero value. I'm not sure this is due to missing code coverage, or a bug. I guess it's worth looking into.
Toon claes <toon@iotcl.com> writes: >> + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); > > Any reason why you're not using the subscript operator (square brackets) > like this: > > + temp_ref->old_oid = transport->smart_options->object_info_oids->oid[i]; Much nicer, but fold such overly long lines, please, temp_ref->old_oid = transport->smart_options-> object_info_oids->oid[i]; to make them readable. > ... > To be honest, I'm not sure it works, because fetch_object_info() always > seem to return a non-zero value. I'm not sure this is due to missing > code coverage, or a bug. I guess it's worth looking into.
Eric Ju <eric.peijian@gmail.com> writes: > From: Calvin Wan <calvinwan@google.com> > > Sometimes it is useful to get information about an object without having > to download it completely. The server logic has already been implemented > as “a2ba162cda (object-info: support for retrieving object info, Nit: s/as/in > 2021-04-20)”. > > Add client functions to communicate with the server. > > The client currently supports requesting a list of object ids with > features 'size' and 'type' from a v2 server. If a server does not But do we support type? I thought we only added support for 'size'. > advertise either of the requested features, then the client falls back > to making the request through 'fetch'. > > Signed-off-by: Calvin Wan <calvinwan@google.com> > Signed-off-by: Eric Ju <eric.peijian@gmail.com> > Helped-by: Jonathan Tan <jonathantanmy@google.com> > Helped-by: Christian Couder <chriscool@tuxfamily.org> > --- > fetch-pack.c | 24 +++++++++++ > fetch-pack.h | 10 +++++ > transport-helper.c | 8 +++- > transport.c | 102 ++++++++++++++++++++++++++++++++++++++++++--- > transport.h | 11 +++++ > 5 files changed, 148 insertions(+), 7 deletions(-) > > diff --git a/fetch-pack.c b/fetch-pack.c > index da0de9c537..d533cac1d8 100644 > --- a/fetch-pack.c > +++ b/fetch-pack.c > @@ -1345,6 +1345,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf, > packet_buf_delim(req_buf); > } > > +void send_object_info_request(int fd_out, struct object_info_args *args) > +{ > + struct strbuf req_buf = STRBUF_INIT; > + > + write_command_and_capabilities(&req_buf, args->server_options, "object-info"); > + > + if (unsorted_string_list_has_string(args->object_info_options, "size")) > + packet_buf_write(&req_buf, "size"); > + > + if (args->oids) { > + for (size_t i = 0; i < args->oids->nr; i++) > + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); > + } > + > + packet_buf_flush(&req_buf); > + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) > + die_errno(_("unable to write request to remote")); > + > + strbuf_release(&req_buf); > +} > + > static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, > struct fetch_pack_args *args, > const struct ref *wants, struct oidset *common, > @@ -1682,6 +1703,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, > if (args->depth > 0 || args->deepen_since || args->deepen_not) > args->deepen = 1; > > + if (args->object_info) > + state = FETCH_SEND_REQUEST; > + > while (state != FETCH_DONE) { > switch (state) { > case FETCH_CHECK_LOCAL: > diff --git a/fetch-pack.h b/fetch-pack.h > index 6775d26517..16e4dc0824 100644 > --- a/fetch-pack.h > +++ b/fetch-pack.h > @@ -16,6 +16,7 @@ struct fetch_pack_args { > const struct string_list *deepen_not; > struct list_objects_filter_options filter_options; > const struct string_list *server_options; > + struct object_info **object_info_data; > > /* > * If not NULL, during packfile negotiation, fetch-pack will send "have" > @@ -42,6 +43,7 @@ struct fetch_pack_args { > unsigned reject_shallow_remote:1; > unsigned deepen:1; > unsigned refetch:1; > + unsigned object_info:1; > > /* > * Indicate that the remote of this request is a promisor remote. The > @@ -68,6 +70,12 @@ struct fetch_pack_args { > unsigned connectivity_checked:1; > }; > > +struct object_info_args { > + struct string_list *object_info_options; > + const struct string_list *server_options; > + struct oid_array *oids; > +}; > + > /* > * sought represents remote references that should be updated from. > * On return, the names that were found on the remote will have been > @@ -101,4 +109,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, > */ > int report_unmatched_refs(struct ref **sought, int nr_sought); > > +void send_object_info_request(int fd_out, struct object_info_args *args); > + > #endif > diff --git a/transport-helper.c b/transport-helper.c > index 9820947ab2..670d1e7068 100644 > --- a/transport-helper.c > +++ b/transport-helper.c > @@ -697,13 +697,17 @@ static int fetch_refs(struct transport *transport, > > /* > * If we reach here, then the server, the client, and/or the transport > - * helper does not support protocol v2. --negotiate-only requires > - * protocol v2. > + * helper does not support protocol v2. --negotiate-only and cat-file remote-object-info > + * require protocol v2. > */ > if (data->transport_options.acked_commits) { > warning(_("--negotiate-only requires protocol v2")); > return -1; > } > + if (transport->smart_options->object_info) { > + // fail the command explicitly to avoid further commands input > + die(_("remote-object-info requires protocol v2")); > + } > > if (!data->get_refs_list_called) > get_refs_list_using_list(transport, 0); > diff --git a/transport.c b/transport.c > index 83ddea8fbc..2847aa3f3c 100644 > --- a/transport.c > +++ b/transport.c > @@ -363,6 +363,73 @@ static struct ref *handshake(struct transport *transport, int for_push, > return refs; > } > > +static int fetch_object_info(struct transport *transport, struct object_info **object_info_data) > +{ > + int size_index = -1; > + struct git_transport_data *data = transport->data; > + struct object_info_args args; > + struct packet_reader reader; > + > + memset(&args, 0, sizeof(args)); Nit: we could `struct object_info_args args = { 0 };` above instead. > + args.server_options = transport->server_options; > + args.object_info_options = transport->smart_options->object_info_options; > + args.oids = transport->smart_options->object_info_oids; > + > + connect_setup(transport, 0); > + packet_reader_init(&reader, data->fd[0], NULL, 0, > + PACKET_READ_CHOMP_NEWLINE | > + PACKET_READ_GENTLE_ON_EOF | > + PACKET_READ_DIE_ON_ERR_PACKET); > + data->version = discover_version(&reader); > + > + transport->hash_algo = reader.hash_algo; > + > + switch (data->version) { > + case protocol_v2: > + if (!server_supports_v2("object-info")) > + return -1; > + if (unsorted_string_list_has_string(args.object_info_options, "size") > + && !server_supports_feature("object-info", "size", 0)) { > + return -1; > + } > + send_object_info_request(data->fd[1], &args); > + break; > + case protocol_v1: > + case protocol_v0: > + die(_("wrong protocol version. expected v2")); > + case protocol_unknown_version: > + BUG("unknown protocol version"); > + } > + > + for (size_t i = 0; i < args.object_info_options->nr; i++) { > + if (packet_reader_read(&reader) != PACKET_READ_NORMAL) { > + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); > + return -1; > + } > + if (unsorted_string_list_has_string(args.object_info_options, reader.line)) { > + if (!strcmp(reader.line, "size")) > + size_index = i; > + continue; > + } > + return -1; > + } > + > + for (size_t i = 0; packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr; i++){ > + struct string_list object_info_values = STRING_LIST_INIT_DUP; We need to also call `string_list_clear()` at the end of this block. > + > + string_list_split(&object_info_values, reader.line, ' ', -1); > + if (0 <= size_index) { > + if (!strcmp(object_info_values.items[1 + size_index].string, "")) > + die("object-info: not our ref %s", > + object_info_values.items[0].string); > + *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10); Perhaps `*object_info_data[i]->sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10);`? So, this is allocated in 'cat-file' and set here? Wouldn't it be nicer to also do the alloc here? > + } > + } > + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); > + > + return 0; > +} > + > static struct ref *get_refs_via_connect(struct transport *transport, int for_push, > struct transport_ls_refs_options *options) > { > @@ -410,6 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport, > struct ref *refs = NULL; > struct fetch_pack_args args; > struct ref *refs_tmp = NULL; > + struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); > > memset(&args, 0, sizeof(args)); > args.uploadpack = data->options.uploadpack; > @@ -436,11 +504,27 @@ static int fetch_refs_via_pack(struct transport *transport, > args.server_options = transport->server_options; > args.negotiation_tips = data->options.negotiation_tips; > args.reject_shallow_remote = transport->smart_options->reject_shallow; > - > - if (!data->finished_handshake) { > - int i; > + args.object_info = transport->smart_options->object_info; > + > + if (transport->smart_options && transport->smart_options->object_info) { > + struct ref *ref = object_info_refs; > + > + if (!fetch_object_info(transport, data->options.object_info_data)) > + goto cleanup; > + args.object_info_data = data->options.object_info_data; > + args.quiet = 1; > + args.no_progress = 1; > + for (size_t i = 0; i < transport->smart_options->object_info_oids->nr; i++) { > + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); > + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); > + temp_ref->exact_oid = 1; > + ref->next = temp_ref; > + ref = ref->next; > + } > + transport->remote_refs = object_info_refs->next; > + } else if (!data->finished_handshake) { > int must_list_refs = 0; > - for (i = 0; i < nr_heads; i++) { > + for (int i = 0; i < nr_heads; i++) { > if (!to_fetch[i]->exact_oid) { > must_list_refs = 1; > break; > @@ -478,11 +562,18 @@ static int fetch_refs_via_pack(struct transport *transport, > &transport->pack_lockfiles, data->version); > > data->finished_handshake = 0; > + if (args.object_info) { > + struct ref *ref_cpy_reader = object_info_refs->next; > + for (int i = 0; ref_cpy_reader; i++) { > + oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args.object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE); > + ref_cpy_reader = ref_cpy_reader->next; > + } > + } > data->options.self_contained_and_connected = > args.self_contained_and_connected; > data->options.connectivity_checked = args.connectivity_checked; > > - if (!refs) > + if (!refs && !args.object_info) > ret = -1; > if (report_unmatched_refs(to_fetch, nr_heads)) > ret = -1; > @@ -498,6 +589,7 @@ static int fetch_refs_via_pack(struct transport *transport, > free_refs(refs_tmp); > free_refs(refs); > list_objects_filter_release(&args.filter_options); > + free_refs(object_info_refs); Shouldn't we loop through `object_info_refs->next` and free all of them ? > return ret; > } > > diff --git a/transport.h b/transport.h > index 6393cd9823..5a3cda1860 100644 > --- a/transport.h > +++ b/transport.h > @@ -5,6 +5,7 @@ > #include "remote.h" > #include "list-objects-filter-options.h" > #include "string-list.h" > +#include "object-store.h" > > struct git_transport_options { > unsigned thin : 1; > @@ -30,6 +31,12 @@ struct git_transport_options { > */ > unsigned connectivity_checked:1; > > + /* > + * Transport will attempt to pull only object-info. Fallbacks > + * to pulling entire object if object-info is not supported. > + */ > + unsigned object_info : 1; > + > int depth; > const char *deepen_since; > const struct string_list *deepen_not; > @@ -53,6 +60,10 @@ struct git_transport_options { > * common commits to this oidset instead of fetching any packfiles. > */ > struct oidset *acked_commits; > + > + struct oid_array *object_info_oids; > + struct object_info **object_info_data; > + struct string_list *object_info_options; > }; > > enum transport_family { > -- > 2.45.2 I wondering if we can add tests at this stage.
On Tue, Jul 9, 2024 at 3:16 AM Toon claes <toon@iotcl.com> wrote: > > Eric Ju <eric.peijian@gmail.com> writes: > > > diff --git a/transport.c b/transport.c > > index 83ddea8fbc..2847aa3f3c 100644 > > --- a/transport.c > > +++ b/transport.c > > @@ -436,11 +504,27 @@ static int fetch_refs_via_pack(struct transport *transport, > > args.server_options = transport->server_options; > > args.negotiation_tips = data->options.negotiation_tips; > > args.reject_shallow_remote = transport->smart_options->reject_shallow; > > - > > - if (!data->finished_handshake) { > > - int i; > > + args.object_info = transport->smart_options->object_info; > > + > > + if (transport->smart_options && transport->smart_options->object_info) { > > + struct ref *ref = object_info_refs; > > + > > + if (!fetch_object_info(transport, data->options.object_info_data)) > > + goto cleanup; > > + args.object_info_data = data->options.object_info_data; > > + args.quiet = 1; > > + args.no_progress = 1; > > + for (size_t i = 0; i < transport->smart_options->object_info_oids->nr; i++) { > > + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); > > + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); > > Any reason why you're not using the subscript operator (square brackets) > like this: > > + temp_ref->old_oid = transport->smart_options->object_info_oids->oid[I]; > Thank you. Fixed in V2. > > + temp_ref->exact_oid = 1; > > + ref->next = temp_ref; > > + ref = ref->next; > > + } > > + transport->remote_refs = object_info_refs->next; > > I find it a bit weird you're allocating object_info_refs, only to use it > to point to the next. Can I suggest a little refactor: > Thank you. I have to agree that the old implementation of iterating on the object_info_refs linked list is a bit obscure. Your suggestion is easier to follow. I am replacing the old logic in V2. > ----8<-----8<---- > diff --git a/transport.c b/transport.c > index 662faa004e..56cb3a1693 100644 > --- a/transport.c > +++ b/transport.c > @@ -479,7 +479,7 @@ static int fetch_refs_via_pack(struct transport *transport, > struct ref *refs = NULL; > struct fetch_pack_args args; > struct ref *refs_tmp = NULL; > - struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); > + struct ref *object_info_refs = NULL; > > memset(&args, 0, sizeof(args)); > args.uploadpack = data->options.uploadpack; > @@ -509,7 +509,7 @@ static int fetch_refs_via_pack(struct transport *transport, > args.object_info = transport->smart_options->object_info; > > if (transport->smart_options && transport->smart_options->object_info) { > - struct ref *ref = object_info_refs; > + struct ref *ref = object_info_refs = xcalloc(1, sizeof (struct ref)); > > if (!fetch_object_info(transport, data->options.object_info_data)) > goto cleanup; > @@ -517,13 +517,12 @@ static int fetch_refs_via_pack(struct transport *transport, > args.quiet = 1; > args.no_progress = 1; > for (size_t i = 0; i < transport->smart_options->object_info_oids->nr; i++) { > - struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); > - temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); > - temp_ref->exact_oid = 1; > - ref->next = temp_ref; > + ref->old_oid = transport->smart_options->object_info_oids->oid[i]; > + ref->exact_oid = 1; > + ref->next = xcalloc(1, sizeof (struct ref)); > ref = ref->next; > } > - transport->remote_refs = object_info_refs->next; > + transport->remote_refs = object_info_refs; > } else if (!data->finished_handshake) { > int must_list_refs = 0; > for (int i = 0; i < nr_heads; i++) { > @@ -565,7 +564,7 @@ static int fetch_refs_via_pack(struct transport *transport, > > data->finished_handshake = 0; > if (args.object_info) { > - struct ref *ref_cpy_reader = object_info_refs->next; > + struct ref *ref_cpy_reader = object_info_refs; > for (int i = 0; ref_cpy_reader; i++) { > oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args.object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE); > ref_cpy_reader = ref_cpy_reader->next; > ----8<-----8<---- > > To be honest, I'm not sure it works, because fetch_object_info() always > seem to return a non-zero value. I'm not sure this is due to missing > code coverage, or a bug. I guess it's worth looking into. > Thank you. I tested your suggestion and it is working. I can confirm it when I did the following with my debugger 1. pause on a test case of t/t1017-cat-file-remote-object-info.sh 2. git cat-file "--batch-command=%(objectname) %(objectsize)" 3. remote-object-info http://127.0.0.1:11017/smart/http_parent 5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689 I set breakpoints all along and see that fetch_object_info() returned zero Would you mind sharing your test steps with me? I would love to dig deeper. > -- > Toon
On Tue, Jul 9, 2024 at 12:37 PM Junio C Hamano <gitster@pobox.com> wrote: > > Toon claes <toon@iotcl.com> writes: > > >> + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); > > > > Any reason why you're not using the subscript operator (square brackets) > > like this: > > > > + temp_ref->old_oid = transport->smart_options->object_info_oids->oid[i]; > > Much nicer, but fold such overly long lines, please, > > temp_ref->old_oid = transport->smart_options-> > object_info_oids->oid[i]; > > to make them readable. > > Thank you, sir. I will follow the folding format in V2. > > > ... > > To be honest, I'm not sure it works, because fetch_object_info() always > > seem to return a non-zero value. I'm not sure this is due to missing > > code coverage, or a bug. I guess it's worth looking into.
On Wed, Jul 10, 2024 at 6:13 AM Karthik Nayak <karthik.188@gmail.com> wrote: > > Eric Ju <eric.peijian@gmail.com> writes: > > > From: Calvin Wan <calvinwan@google.com> > > > > Sometimes it is useful to get information about an object without having > > to download it completely. The server logic has already been implemented > > as “a2ba162cda (object-info: support for retrieving object info, > > Nit: s/as/in > Thank you. Fixed in V2. > > 2021-04-20)”. > > > > Add client functions to communicate with the server. > > > > The client currently supports requesting a list of object ids with > > features 'size' and 'type' from a v2 server. If a server does not > > But do we support type? I thought we only added support for 'size'. > Thank you. Yes, only size is supported, I will revise it. > > advertise either of the requested features, then the client falls back > > to making the request through 'fetch'. > > > > Signed-off-by: Calvin Wan <calvinwan@google.com> > > Signed-off-by: Eric Ju <eric.peijian@gmail.com> > > Helped-by: Jonathan Tan <jonathantanmy@google.com> > > Helped-by: Christian Couder <chriscool@tuxfamily.org> > > --- > > fetch-pack.c | 24 +++++++++++ > > fetch-pack.h | 10 +++++ > > transport-helper.c | 8 +++- > > transport.c | 102 ++++++++++++++++++++++++++++++++++++++++++--- > > transport.h | 11 +++++ > > 5 files changed, 148 insertions(+), 7 deletions(-) > > > > diff --git a/fetch-pack.c b/fetch-pack.c > > index da0de9c537..d533cac1d8 100644 > > --- a/fetch-pack.c > > +++ b/fetch-pack.c > > @@ -1345,6 +1345,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf, > > packet_buf_delim(req_buf); > > } > > > > +void send_object_info_request(int fd_out, struct object_info_args *args) > > +{ > > + struct strbuf req_buf = STRBUF_INIT; > > + > > + write_command_and_capabilities(&req_buf, args->server_options, "object-info"); > > + > > + if (unsorted_string_list_has_string(args->object_info_options, "size")) > > + packet_buf_write(&req_buf, "size"); > > + > > + if (args->oids) { > > + for (size_t i = 0; i < args->oids->nr; i++) > > + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); > > + } > > + > > + packet_buf_flush(&req_buf); > > + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) > > + die_errno(_("unable to write request to remote")); > > + > > + strbuf_release(&req_buf); > > +} > > + > > static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, > > struct fetch_pack_args *args, > > const struct ref *wants, struct oidset *common, > > @@ -1682,6 +1703,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, > > if (args->depth > 0 || args->deepen_since || args->deepen_not) > > args->deepen = 1; > > > > + if (args->object_info) > > + state = FETCH_SEND_REQUEST; > > + > > while (state != FETCH_DONE) { > > switch (state) { > > case FETCH_CHECK_LOCAL: > > diff --git a/fetch-pack.h b/fetch-pack.h > > index 6775d26517..16e4dc0824 100644 > > --- a/fetch-pack.h > > +++ b/fetch-pack.h > > @@ -16,6 +16,7 @@ struct fetch_pack_args { > > const struct string_list *deepen_not; > > struct list_objects_filter_options filter_options; > > const struct string_list *server_options; > > + struct object_info **object_info_data; > > > > /* > > * If not NULL, during packfile negotiation, fetch-pack will send "have" > > @@ -42,6 +43,7 @@ struct fetch_pack_args { > > unsigned reject_shallow_remote:1; > > unsigned deepen:1; > > unsigned refetch:1; > > + unsigned object_info:1; > > > > /* > > * Indicate that the remote of this request is a promisor remote. The > > @@ -68,6 +70,12 @@ struct fetch_pack_args { > > unsigned connectivity_checked:1; > > }; > > > > +struct object_info_args { > > + struct string_list *object_info_options; > > + const struct string_list *server_options; > > + struct oid_array *oids; > > +}; > > + > > /* > > * sought represents remote references that should be updated from. > > * On return, the names that were found on the remote will have been > > @@ -101,4 +109,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, > > */ > > int report_unmatched_refs(struct ref **sought, int nr_sought); > > > > +void send_object_info_request(int fd_out, struct object_info_args *args); > > + > > #endif > > diff --git a/transport-helper.c b/transport-helper.c > > index 9820947ab2..670d1e7068 100644 > > --- a/transport-helper.c > > +++ b/transport-helper.c > > @@ -697,13 +697,17 @@ static int fetch_refs(struct transport *transport, > > > > /* > > * If we reach here, then the server, the client, and/or the transport > > - * helper does not support protocol v2. --negotiate-only requires > > - * protocol v2. > > + * helper does not support protocol v2. --negotiate-only and cat-file remote-object-info > > + * require protocol v2. > > */ > > if (data->transport_options.acked_commits) { > > warning(_("--negotiate-only requires protocol v2")); > > return -1; > > } > > + if (transport->smart_options->object_info) { > > + // fail the command explicitly to avoid further commands input > > + die(_("remote-object-info requires protocol v2")); > > + } > > > > if (!data->get_refs_list_called) > > get_refs_list_using_list(transport, 0); > > diff --git a/transport.c b/transport.c > > index 83ddea8fbc..2847aa3f3c 100644 > > --- a/transport.c > > +++ b/transport.c > > @@ -363,6 +363,73 @@ static struct ref *handshake(struct transport *transport, int for_push, > > return refs; > > } > > > > +static int fetch_object_info(struct transport *transport, struct object_info **object_info_data) > > +{ > > + int size_index = -1; > > + struct git_transport_data *data = transport->data; > > + struct object_info_args args; > > + struct packet_reader reader; > > + > > + memset(&args, 0, sizeof(args)); > > Nit: we could `struct object_info_args args = { 0 };` above instead. Thank you. Your suggestion has better readability and maintainability. I am adopting it in V2. > > > + args.server_options = transport->server_options; > > + args.object_info_options = transport->smart_options->object_info_options; > > + args.oids = transport->smart_options->object_info_oids; > > + > > + connect_setup(transport, 0); > > + packet_reader_init(&reader, data->fd[0], NULL, 0, > > + PACKET_READ_CHOMP_NEWLINE | > > + PACKET_READ_GENTLE_ON_EOF | > > + PACKET_READ_DIE_ON_ERR_PACKET); > > + data->version = discover_version(&reader); > > + > > + transport->hash_algo = reader.hash_algo; > > + > > + switch (data->version) { > > + case protocol_v2: > > + if (!server_supports_v2("object-info")) > > + return -1; > > + if (unsorted_string_list_has_string(args.object_info_options, "size") > > + && !server_supports_feature("object-info", "size", 0)) { > > + return -1; > > + } > > + send_object_info_request(data->fd[1], &args); > > + break; > > + case protocol_v1: > > + case protocol_v0: > > + die(_("wrong protocol version. expected v2")); > > + case protocol_unknown_version: > > + BUG("unknown protocol version"); > > + } > > + > > + for (size_t i = 0; i < args.object_info_options->nr; i++) { > > + if (packet_reader_read(&reader) != PACKET_READ_NORMAL) { > > + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); > > + return -1; > > + } > > + if (unsorted_string_list_has_string(args.object_info_options, reader.line)) { > > + if (!strcmp(reader.line, "size")) > > + size_index = i; > > + continue; > > + } > > + return -1; > > + } > > + > > + for (size_t i = 0; packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr; i++){ > > + struct string_list object_info_values = STRING_LIST_INIT_DUP; > > We need to also call `string_list_clear()` at the end of this block. > > > + > > + string_list_split(&object_info_values, reader.line, ' ', -1); > > + if (0 <= size_index) { > > + if (!strcmp(object_info_values.items[1 + size_index].string, "")) > > + die("object-info: not our ref %s", > > + object_info_values.items[0].string); > > + *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10); > > Perhaps `*object_info_data[i]->sizep = > strtoul(object_info_values.items[1 + size_index].string, NULL, 10);`? > > So, this is allocated in 'cat-file' and set here? Wouldn't it be nicer > to also do the alloc here? > > > Perhaps `*object_info_data[i]->sizep = > > strtoul(object_info_values.items[1 + size_index].string, NULL, 10);`? Thank you. Seems that `*(*object_info_data)[i].sizep` and `object_info_data[i]->sizep` are not the same. Given object_info_data is a pointer to a pointer to struct object_info, what `*(*object_info_data)[i].sizep` does is 1. *object_info_data dereferences object_info_data, yielding a pointer to the first element of the array of struct object_info. 2. (*object_info_data)[i] accesses the i-th element in the array of struct object_info that *object_info_data points to. 4, (*object_info_data)[i].sizep accesses the sizep member of the i-th struct object_info. 5. *(*object_info_data)[i].sizep dereferences the sizep pointer, yielding the value it points to. So we are interested in the array of struct object_info with its first element at *object_info_data. A more intuitive way of thinking it is that if we think object_info_data as a 2-D array, *(*object_info_data)[i] is accessing the object_info_data[0][i]. For `*object_info_data[i]->sizep`: 1. object_info_data[i] accesses the i-th element in the array of pointers to struct object_info. 2. object_info_data[i]->sizep accesses the sizep member of the i-th struct object_info that object_info_data[i] points to. 3. *object_info_data[i]->sizep dereferences the sizep pointer, yielding the value it points to. *object_info_data[i]->sizep will treat object_info_data as an array of pointers. In the mental model of 2D array, *object_info_data[i] is like object_info_data[i][0] Nevertheless, I do think using a pointer to a pointer is tricky and error-prone. In V2, I am refactoring the code to use just a pointer instead of a pointer to a pointer. For example, in transport.h git_transport_options { ... struct object_info *object_info_data; ... } > > So, this is allocated in 'cat-file' and set here? Wouldn't it be nicer > > to also do the alloc here? Thank you. Yes, this makes sense, V2 is refactoring the allocation into `fetch_object_info()` in transport.c > > + } > > + } > > + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); > > + > > + return 0; > > +} > > + > > static struct ref *get_refs_via_connect(struct transport *transport, int for_push, > > struct transport_ls_refs_options *options) > > { > > @@ -410,6 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport, > > struct ref *refs = NULL; > > struct fetch_pack_args args; > > struct ref *refs_tmp = NULL; > > + struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); > > > > memset(&args, 0, sizeof(args)); > > args.uploadpack = data->options.uploadpack; > > @@ -436,11 +504,27 @@ static int fetch_refs_via_pack(struct transport *transport, > > args.server_options = transport->server_options; > > args.negotiation_tips = data->options.negotiation_tips; > > args.reject_shallow_remote = transport->smart_options->reject_shallow; > > - > > - if (!data->finished_handshake) { > > - int i; > > + args.object_info = transport->smart_options->object_info; > > + > > + if (transport->smart_options && transport->smart_options->object_info) { > > + struct ref *ref = object_info_refs; > > + > > + if (!fetch_object_info(transport, data->options.object_info_data)) > > + goto cleanup; > > + args.object_info_data = data->options.object_info_data; > > + args.quiet = 1; > > + args.no_progress = 1; > > + for (size_t i = 0; i < transport->smart_options->object_info_oids->nr; i++) { > > + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); > > + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); > > + temp_ref->exact_oid = 1; > > + ref->next = temp_ref; > > + ref = ref->next; > > + } > > + transport->remote_refs = object_info_refs->next; > > + } else if (!data->finished_handshake) { > > int must_list_refs = 0; > > - for (i = 0; i < nr_heads; i++) { > > + for (int i = 0; i < nr_heads; i++) { > > if (!to_fetch[i]->exact_oid) { > > must_list_refs = 1; > > break; > > @@ -478,11 +562,18 @@ static int fetch_refs_via_pack(struct transport *transport, > > &transport->pack_lockfiles, data->version); > > > > data->finished_handshake = 0; > > + if (args.object_info) { > > + struct ref *ref_cpy_reader = object_info_refs->next; > > + for (int i = 0; ref_cpy_reader; i++) { > > + oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args.object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE); > > + ref_cpy_reader = ref_cpy_reader->next; > > + } > > + } > > data->options.self_contained_and_connected = > > args.self_contained_and_connected; > > data->options.connectivity_checked = args.connectivity_checked; > > > > - if (!refs) > > + if (!refs && !args.object_info) > > ret = -1; > > if (report_unmatched_refs(to_fetch, nr_heads)) > > ret = -1; > > @@ -498,6 +589,7 @@ static int fetch_refs_via_pack(struct transport *transport, > > free_refs(refs_tmp); > > free_refs(refs); > > list_objects_filter_release(&args.filter_options); > > + free_refs(object_info_refs); > > Shouldn't we loop through `object_info_refs->next` and free all of them ? > Thank you. I think free_refs() has the logic to loop through object_info_refs->next and feel the linked list. > > return ret; > > } > > > > diff --git a/transport.h b/transport.h > > index 6393cd9823..5a3cda1860 100644 > > --- a/transport.h > > +++ b/transport.h > > @@ -5,6 +5,7 @@ > > #include "remote.h" > > #include "list-objects-filter-options.h" > > #include "string-list.h" > > +#include "object-store.h" > > > > struct git_transport_options { > > unsigned thin : 1; > > @@ -30,6 +31,12 @@ struct git_transport_options { > > */ > > unsigned connectivity_checked:1; > > > > + /* > > + * Transport will attempt to pull only object-info. Fallbacks > > + * to pulling entire object if object-info is not supported. > > + */ > > + unsigned object_info : 1; > > + > > int depth; > > const char *deepen_since; > > const struct string_list *deepen_not; > > @@ -53,6 +60,10 @@ struct git_transport_options { > > * common commits to this oidset instead of fetching any packfiles. > > */ > > struct oidset *acked_commits; > > + > > + struct oid_array *object_info_oids; > > + struct object_info **object_info_data; > > + struct string_list *object_info_options; > > }; > > > > enum transport_family { > > -- > > 2.45.2 > > I wondering if we can add tests at this stage. Thank you. V2 is adding more tests to cover this.
diff --git a/fetch-pack.c b/fetch-pack.c index da0de9c537..d533cac1d8 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1345,6 +1345,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf, packet_buf_delim(req_buf); } +void send_object_info_request(int fd_out, struct object_info_args *args) +{ + struct strbuf req_buf = STRBUF_INIT; + + write_command_and_capabilities(&req_buf, args->server_options, "object-info"); + + if (unsorted_string_list_has_string(args->object_info_options, "size")) + packet_buf_write(&req_buf, "size"); + + if (args->oids) { + for (size_t i = 0; i < args->oids->nr; i++) + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); + } + + packet_buf_flush(&req_buf); + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) + die_errno(_("unable to write request to remote")); + + strbuf_release(&req_buf); +} + static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, @@ -1682,6 +1703,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (args->depth > 0 || args->deepen_since || args->deepen_not) args->deepen = 1; + if (args->object_info) + state = FETCH_SEND_REQUEST; + while (state != FETCH_DONE) { switch (state) { case FETCH_CHECK_LOCAL: diff --git a/fetch-pack.h b/fetch-pack.h index 6775d26517..16e4dc0824 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -16,6 +16,7 @@ struct fetch_pack_args { const struct string_list *deepen_not; struct list_objects_filter_options filter_options; const struct string_list *server_options; + struct object_info **object_info_data; /* * If not NULL, during packfile negotiation, fetch-pack will send "have" @@ -42,6 +43,7 @@ struct fetch_pack_args { unsigned reject_shallow_remote:1; unsigned deepen:1; unsigned refetch:1; + unsigned object_info:1; /* * Indicate that the remote of this request is a promisor remote. The @@ -68,6 +70,12 @@ struct fetch_pack_args { unsigned connectivity_checked:1; }; +struct object_info_args { + struct string_list *object_info_options; + const struct string_list *server_options; + struct oid_array *oids; +}; + /* * sought represents remote references that should be updated from. * On return, the names that were found on the remote will have been @@ -101,4 +109,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, */ int report_unmatched_refs(struct ref **sought, int nr_sought); +void send_object_info_request(int fd_out, struct object_info_args *args); + #endif diff --git a/transport-helper.c b/transport-helper.c index 9820947ab2..670d1e7068 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -697,13 +697,17 @@ static int fetch_refs(struct transport *transport, /* * If we reach here, then the server, the client, and/or the transport - * helper does not support protocol v2. --negotiate-only requires - * protocol v2. + * helper does not support protocol v2. --negotiate-only and cat-file remote-object-info + * require protocol v2. */ if (data->transport_options.acked_commits) { warning(_("--negotiate-only requires protocol v2")); return -1; } + if (transport->smart_options->object_info) { + // fail the command explicitly to avoid further commands input + die(_("remote-object-info requires protocol v2")); + } if (!data->get_refs_list_called) get_refs_list_using_list(transport, 0); diff --git a/transport.c b/transport.c index 83ddea8fbc..2847aa3f3c 100644 --- a/transport.c +++ b/transport.c @@ -363,6 +363,73 @@ static struct ref *handshake(struct transport *transport, int for_push, return refs; } +static int fetch_object_info(struct transport *transport, struct object_info **object_info_data) +{ + int size_index = -1; + struct git_transport_data *data = transport->data; + struct object_info_args args; + struct packet_reader reader; + + memset(&args, 0, sizeof(args)); + args.server_options = transport->server_options; + args.object_info_options = transport->smart_options->object_info_options; + args.oids = transport->smart_options->object_info_oids; + + connect_setup(transport, 0); + packet_reader_init(&reader, data->fd[0], NULL, 0, + PACKET_READ_CHOMP_NEWLINE | + PACKET_READ_GENTLE_ON_EOF | + PACKET_READ_DIE_ON_ERR_PACKET); + data->version = discover_version(&reader); + + transport->hash_algo = reader.hash_algo; + + switch (data->version) { + case protocol_v2: + if (!server_supports_v2("object-info")) + return -1; + if (unsorted_string_list_has_string(args.object_info_options, "size") + && !server_supports_feature("object-info", "size", 0)) { + return -1; + } + send_object_info_request(data->fd[1], &args); + break; + case protocol_v1: + case protocol_v0: + die(_("wrong protocol version. expected v2")); + case protocol_unknown_version: + BUG("unknown protocol version"); + } + + for (size_t i = 0; i < args.object_info_options->nr; i++) { + if (packet_reader_read(&reader) != PACKET_READ_NORMAL) { + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); + return -1; + } + if (unsorted_string_list_has_string(args.object_info_options, reader.line)) { + if (!strcmp(reader.line, "size")) + size_index = i; + continue; + } + return -1; + } + + for (size_t i = 0; packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr; i++){ + struct string_list object_info_values = STRING_LIST_INIT_DUP; + + string_list_split(&object_info_values, reader.line, ' ', -1); + if (0 <= size_index) { + if (!strcmp(object_info_values.items[1 + size_index].string, "")) + die("object-info: not our ref %s", + object_info_values.items[0].string); + *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10); + } + } + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); + + return 0; +} + static struct ref *get_refs_via_connect(struct transport *transport, int for_push, struct transport_ls_refs_options *options) { @@ -410,6 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport, struct ref *refs = NULL; struct fetch_pack_args args; struct ref *refs_tmp = NULL; + struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); memset(&args, 0, sizeof(args)); args.uploadpack = data->options.uploadpack; @@ -436,11 +504,27 @@ static int fetch_refs_via_pack(struct transport *transport, args.server_options = transport->server_options; args.negotiation_tips = data->options.negotiation_tips; args.reject_shallow_remote = transport->smart_options->reject_shallow; - - if (!data->finished_handshake) { - int i; + args.object_info = transport->smart_options->object_info; + + if (transport->smart_options && transport->smart_options->object_info) { + struct ref *ref = object_info_refs; + + if (!fetch_object_info(transport, data->options.object_info_data)) + goto cleanup; + args.object_info_data = data->options.object_info_data; + args.quiet = 1; + args.no_progress = 1; + for (size_t i = 0; i < transport->smart_options->object_info_oids->nr; i++) { + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); + temp_ref->exact_oid = 1; + ref->next = temp_ref; + ref = ref->next; + } + transport->remote_refs = object_info_refs->next; + } else if (!data->finished_handshake) { int must_list_refs = 0; - for (i = 0; i < nr_heads; i++) { + for (int i = 0; i < nr_heads; i++) { if (!to_fetch[i]->exact_oid) { must_list_refs = 1; break; @@ -478,11 +562,18 @@ static int fetch_refs_via_pack(struct transport *transport, &transport->pack_lockfiles, data->version); data->finished_handshake = 0; + if (args.object_info) { + struct ref *ref_cpy_reader = object_info_refs->next; + for (int i = 0; ref_cpy_reader; i++) { + oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args.object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE); + ref_cpy_reader = ref_cpy_reader->next; + } + } data->options.self_contained_and_connected = args.self_contained_and_connected; data->options.connectivity_checked = args.connectivity_checked; - if (!refs) + if (!refs && !args.object_info) ret = -1; if (report_unmatched_refs(to_fetch, nr_heads)) ret = -1; @@ -498,6 +589,7 @@ static int fetch_refs_via_pack(struct transport *transport, free_refs(refs_tmp); free_refs(refs); list_objects_filter_release(&args.filter_options); + free_refs(object_info_refs); return ret; } diff --git a/transport.h b/transport.h index 6393cd9823..5a3cda1860 100644 --- a/transport.h +++ b/transport.h @@ -5,6 +5,7 @@ #include "remote.h" #include "list-objects-filter-options.h" #include "string-list.h" +#include "object-store.h" struct git_transport_options { unsigned thin : 1; @@ -30,6 +31,12 @@ struct git_transport_options { */ unsigned connectivity_checked:1; + /* + * Transport will attempt to pull only object-info. Fallbacks + * to pulling entire object if object-info is not supported. + */ + unsigned object_info : 1; + int depth; const char *deepen_since; const struct string_list *deepen_not; @@ -53,6 +60,10 @@ struct git_transport_options { * common commits to this oidset instead of fetching any packfiles. */ struct oidset *acked_commits; + + struct oid_array *object_info_oids; + struct object_info **object_info_data; + struct string_list *object_info_options; }; enum transport_family {