diff mbox series

[1/5] upload-pack: look up "want" lines via commit-graph

Message ID ca5e136cca495c7d927e99f5ae8a672d93823eea.1645619224.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series fetch: more optimizations for mirror fetches | expand

Commit Message

Patrick Steinhardt Feb. 23, 2022, 12:35 p.m. UTC
During packfile negotiation the client will send "want" and "want-ref"
lines to the server to tell it which objects it is interested in. The
server-side parses each of those and looks them up to see whether it
actually has requested objects. This lookup is performed by calling
`parse_object()` directly, which thus hits the object database. In the
general case though most of the objects the client requests will be
commits. We can thus try to look up the object via the commit-graph
opportunistically, which is much faster than doing the same via the
object database.

Refactor parsing of both "want" and "want-ref" lines to do so.

The following benchmark is executed in a repository with a huge number
of references. It uses cached request from git-fetch(1) as input and
contains about 876,000 "want" lines:

    Benchmark 1: git-upload-pack (HEAD~)
      Time (mean ± σ):      7.113 s ±  0.028 s    [User: 6.900 s, System: 0.662 s]
      Range (min … max):    7.072 s …  7.168 s    10 runs

    Benchmark 2: git-upload-pack (HEAD)
      Time (mean ± σ):      6.622 s ±  0.061 s    [User: 6.452 s, System: 0.650 s]
      Range (min … max):    6.535 s …  6.727 s    10 runs

    Summary
      'git-upload-pack (HEAD)' ran
        1.07 ± 0.01 times faster than 'git-upload-pack (HEAD~)'

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 upload-pack.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

Comments

Derrick Stolee Feb. 23, 2022, 2:13 p.m. UTC | #1
On 2/23/2022 7:35 AM, Patrick Steinhardt wrote:

> The following benchmark is executed in a repository with a huge number
> of references. It uses cached request from git-fetch(1) as input and
> contains about 876,000 "want" lines:
> 
>     Benchmark 1: git-upload-pack (HEAD~)
>       Time (mean ± σ):      7.113 s ±  0.028 s    [User: 6.900 s, System: 0.662 s]
>       Range (min … max):    7.072 s …  7.168 s    10 runs
> 
>     Benchmark 2: git-upload-pack (HEAD)
>       Time (mean ± σ):      6.622 s ±  0.061 s    [User: 6.452 s, System: 0.650 s]
>       Range (min … max):    6.535 s …  6.727 s    10 runs
> 
>     Summary
>       'git-upload-pack (HEAD)' ran
>         1.07 ± 0.01 times faster than 'git-upload-pack (HEAD~)'

Nice!

> -		o = parse_object(the_repository, &oid);
> +		commit = lookup_commit_in_graph(the_repository, &oid);
> +		if (commit)
> +			o = &commit->object;
> +		else
> +			o = parse_object(the_repository, &oid);
> +

This is a neat trick. I see that we've also done this trick in
revision.c:get_reference(). Perhaps it is worth creating a helper,
maybe named parse_probably_commit()?

>  		if (!o) {
>  			packet_writer_error(writer,
>  					    "upload-pack: not our ref %s",
> @@ -1434,7 +1440,7 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
>  	if (skip_prefix(line, "want-ref ", &refname_nons)) {
>  		struct object_id oid;
>  		struct string_list_item *item;
> -		struct object *o;
> +		struct object *o = NULL;
>  		struct strbuf refname = STRBUF_INIT;
>  
>  		strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons);
> @@ -1448,7 +1454,15 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
>  		item = string_list_append(wanted_refs, refname_nons);
>  		item->util = oiddup(&oid);
>  
> -		o = parse_object_or_die(&oid, refname_nons);
> +		if (!starts_with(refname_nons, "refs/tags/")) {
> +			struct commit *commit = lookup_commit_in_graph(the_repository, &oid);
> +			if (commit)
> +				o = &commit->object;
> +		}
> +
> +		if (!o)
> +			o = parse_object_or_die(&oid, refname_nons);
> +

Even here, we _could_ use a parse_probably_commit() helper
inside the if (!starts_with(...)) block, even though we would
still need the if (!o) check later.

Thanks,
-Stolee
Patrick Steinhardt March 1, 2022, 8:43 a.m. UTC | #2
On Wed, Feb 23, 2022 at 09:13:53AM -0500, Derrick Stolee wrote:
> On 2/23/2022 7:35 AM, Patrick Steinhardt wrote:
> 
> > The following benchmark is executed in a repository with a huge number
> > of references. It uses cached request from git-fetch(1) as input and
> > contains about 876,000 "want" lines:
> > 
> >     Benchmark 1: git-upload-pack (HEAD~)
> >       Time (mean ± σ):      7.113 s ±  0.028 s    [User: 6.900 s, System: 0.662 s]
> >       Range (min … max):    7.072 s …  7.168 s    10 runs
> > 
> >     Benchmark 2: git-upload-pack (HEAD)
> >       Time (mean ± σ):      6.622 s ±  0.061 s    [User: 6.452 s, System: 0.650 s]
> >       Range (min … max):    6.535 s …  6.727 s    10 runs
> > 
> >     Summary
> >       'git-upload-pack (HEAD)' ran
> >         1.07 ± 0.01 times faster than 'git-upload-pack (HEAD~)'
> 
> Nice!
> 
> > -		o = parse_object(the_repository, &oid);
> > +		commit = lookup_commit_in_graph(the_repository, &oid);
> > +		if (commit)
> > +			o = &commit->object;
> > +		else
> > +			o = parse_object(the_repository, &oid);
> > +
> 
> This is a neat trick. I see that we've also done this trick in
> revision.c:get_reference(). Perhaps it is worth creating a helper,
> maybe named parse_probably_commit()?

That might be a good idea, thanks. I'll have a look at what the end
result would look like.

Patrick

> >  		if (!o) {
> >  			packet_writer_error(writer,
> >  					    "upload-pack: not our ref %s",
> > @@ -1434,7 +1440,7 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
> >  	if (skip_prefix(line, "want-ref ", &refname_nons)) {
> >  		struct object_id oid;
> >  		struct string_list_item *item;
> > -		struct object *o;
> > +		struct object *o = NULL;
> >  		struct strbuf refname = STRBUF_INIT;
> >  
> >  		strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons);
> > @@ -1448,7 +1454,15 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
> >  		item = string_list_append(wanted_refs, refname_nons);
> >  		item->util = oiddup(&oid);
> >  
> > -		o = parse_object_or_die(&oid, refname_nons);
> > +		if (!starts_with(refname_nons, "refs/tags/")) {
> > +			struct commit *commit = lookup_commit_in_graph(the_repository, &oid);
> > +			if (commit)
> > +				o = &commit->object;
> > +		}
> > +
> > +		if (!o)
> > +			o = parse_object_or_die(&oid, refname_nons);
> > +
> 
> Even here, we _could_ use a parse_probably_commit() helper
> inside the if (!starts_with(...)) block, even though we would
> still need the if (!o) check later.
> 
> Thanks,
> -Stolee
Patrick Steinhardt March 1, 2022, 9:24 a.m. UTC | #3
On Tue, Mar 01, 2022 at 09:43:19AM +0100, Patrick Steinhardt wrote:
> On Wed, Feb 23, 2022 at 09:13:53AM -0500, Derrick Stolee wrote:
> > On 2/23/2022 7:35 AM, Patrick Steinhardt wrote:
> > 
> > > The following benchmark is executed in a repository with a huge number
> > > of references. It uses cached request from git-fetch(1) as input and
> > > contains about 876,000 "want" lines:
> > > 
> > >     Benchmark 1: git-upload-pack (HEAD~)
> > >       Time (mean ± σ):      7.113 s ±  0.028 s    [User: 6.900 s, System: 0.662 s]
> > >       Range (min … max):    7.072 s …  7.168 s    10 runs
> > > 
> > >     Benchmark 2: git-upload-pack (HEAD)
> > >       Time (mean ± σ):      6.622 s ±  0.061 s    [User: 6.452 s, System: 0.650 s]
> > >       Range (min … max):    6.535 s …  6.727 s    10 runs
> > > 
> > >     Summary
> > >       'git-upload-pack (HEAD)' ran
> > >         1.07 ± 0.01 times faster than 'git-upload-pack (HEAD~)'
> > 
> > Nice!
> > 
> > > -		o = parse_object(the_repository, &oid);
> > > +		commit = lookup_commit_in_graph(the_repository, &oid);
> > > +		if (commit)
> > > +			o = &commit->object;
> > > +		else
> > > +			o = parse_object(the_repository, &oid);
> > > +
> > 
> > This is a neat trick. I see that we've also done this trick in
> > revision.c:get_reference(). Perhaps it is worth creating a helper,
> > maybe named parse_probably_commit()?
> 
> That might be a good idea, thanks. I'll have a look at what the end
> result would look like.
> 
> Patrick

I had a look at existing callsites which use `lookup_commit_in_graph()`,
but I found that it wasn't easily possible to convert them all to use a
new helper like you propose. Most of them have some custom logic like
skipping `parse_object()` if it's part of a promisor pack, so I really
only found two locations where such a new helper could be used without
also adding and supporting flags. I don't really think that's worth it
for now.

Patrick

> > >  		if (!o) {
> > >  			packet_writer_error(writer,
> > >  					    "upload-pack: not our ref %s",
> > > @@ -1434,7 +1440,7 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
> > >  	if (skip_prefix(line, "want-ref ", &refname_nons)) {
> > >  		struct object_id oid;
> > >  		struct string_list_item *item;
> > > -		struct object *o;
> > > +		struct object *o = NULL;
> > >  		struct strbuf refname = STRBUF_INIT;
> > >  
> > >  		strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons);
> > > @@ -1448,7 +1454,15 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
> > >  		item = string_list_append(wanted_refs, refname_nons);
> > >  		item->util = oiddup(&oid);
> > >  
> > > -		o = parse_object_or_die(&oid, refname_nons);
> > > +		if (!starts_with(refname_nons, "refs/tags/")) {
> > > +			struct commit *commit = lookup_commit_in_graph(the_repository, &oid);
> > > +			if (commit)
> > > +				o = &commit->object;
> > > +		}
> > > +
> > > +		if (!o)
> > > +			o = parse_object_or_die(&oid, refname_nons);
> > > +
> > 
> > Even here, we _could_ use a parse_probably_commit() helper
> > inside the if (!starts_with(...)) block, even though we would
> > still need the if (!o) check later.
> > 
> > Thanks,
> > -Stolee
Derrick Stolee March 2, 2022, 6:53 p.m. UTC | #4
On 3/1/2022 4:24 AM, Patrick Steinhardt wrote:
> On Tue, Mar 01, 2022 at 09:43:19AM +0100, Patrick Steinhardt wrote:
>> On Wed, Feb 23, 2022 at 09:13:53AM -0500, Derrick Stolee wrote:
>>> On 2/23/2022 7:35 AM, Patrick Steinhardt wrote:

>>>> -		o = parse_object(the_repository, &oid);
>>>> +		commit = lookup_commit_in_graph(the_repository, &oid);
>>>> +		if (commit)
>>>> +			o = &commit->object;
>>>> +		else
>>>> +			o = parse_object(the_repository, &oid);
>>>> +
>>>
>>> This is a neat trick. I see that we've also done this trick in
>>> revision.c:get_reference(). Perhaps it is worth creating a helper,
>>> maybe named parse_probably_commit()?
>>
>> That might be a good idea, thanks. I'll have a look at what the end
>> result would look like.
>>
>> Patrick
> 
> I had a look at existing callsites which use `lookup_commit_in_graph()`,
> but I found that it wasn't easily possible to convert them all to use a
> new helper like you propose. Most of them have some custom logic like
> skipping `parse_object()` if it's part of a promisor pack, so I really
> only found two locations where such a new helper could be used without
> also adding and supporting flags. I don't really think that's worth it
> for now.

Thanks for taking the time to look into it.

-Stolee
diff mbox series

Patch

diff --git a/upload-pack.c b/upload-pack.c
index 8acc98741b..3a851b3606 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1400,13 +1400,19 @@  static int parse_want(struct packet_writer *writer, const char *line,
 	const char *arg;
 	if (skip_prefix(line, "want ", &arg)) {
 		struct object_id oid;
+		struct commit *commit;
 		struct object *o;
 
 		if (get_oid_hex(arg, &oid))
 			die("git upload-pack: protocol error, "
 			    "expected to get oid, not '%s'", line);
 
-		o = parse_object(the_repository, &oid);
+		commit = lookup_commit_in_graph(the_repository, &oid);
+		if (commit)
+			o = &commit->object;
+		else
+			o = parse_object(the_repository, &oid);
+
 		if (!o) {
 			packet_writer_error(writer,
 					    "upload-pack: not our ref %s",
@@ -1434,7 +1440,7 @@  static int parse_want_ref(struct packet_writer *writer, const char *line,
 	if (skip_prefix(line, "want-ref ", &refname_nons)) {
 		struct object_id oid;
 		struct string_list_item *item;
-		struct object *o;
+		struct object *o = NULL;
 		struct strbuf refname = STRBUF_INIT;
 
 		strbuf_addf(&refname, "%s%s", get_git_namespace(), refname_nons);
@@ -1448,7 +1454,15 @@  static int parse_want_ref(struct packet_writer *writer, const char *line,
 		item = string_list_append(wanted_refs, refname_nons);
 		item->util = oiddup(&oid);
 
-		o = parse_object_or_die(&oid, refname_nons);
+		if (!starts_with(refname_nons, "refs/tags/")) {
+			struct commit *commit = lookup_commit_in_graph(the_repository, &oid);
+			if (commit)
+				o = &commit->object;
+		}
+
+		if (!o)
+			o = parse_object_or_die(&oid, refname_nons);
+
 		if (!(o->flags & WANTED)) {
 			o->flags |= WANTED;
 			add_object_array(o, NULL, want_obj);