diff mbox series

[14/15] upload-pack.c: avoid enumerating hidden refs where possible

Message ID 44bbf85e73676b2c89a82c09f7d355122ce6e805.1683581621.git.me@ttaylorr.com (mailing list archive)
State New, archived
Headers show
Series refs: implement skip lists for packed backend | expand

Commit Message

Taylor Blau May 8, 2023, 10 p.m. UTC
In a similar fashion as a previous commit, teach `upload-pack` to avoid
enumerating hidden references where possible.

Note, however, that there are certain cases where cannot avoid
enumerating even hidden references, in particular when either of:

  - `uploadpack.allowTipSHA1InWant`, or
  - `uploadpack.allowReachableSHA1InWant`

are set, corresponding to `ALLOW_TIP_SHA1` and `ALLOW_REACHABLE_SHA1`,
respectively.

When either of these bits are set, upload-pack's `is_our_ref()` function
needs to consider the `HIDDEN_REF` bit of the referent's object flags.
So we must visit all references, including the hidden ones, in order to
mark their referents with the `HIDDEN_REF` bit.

When neither `ALLOW_TIP_SHA1` nor `ALLOW_REACHABLE_SHA1` are set, the
`is_our_ref()` function considers only the `OUR_REF` bit, and not the
`HIDDEN_REF` one. `OUR_REF` is applied via `mark_our_ref()`, and only
to objects at the tips of non-hidden references, so we do not need to
visit hidden references in this case.

When neither of those bits are set, `upload-pack` can potentially avoid
enumerating a large number of references. In the same example as a
previous commit (linux.git with one hidden reference per commit,
"refs/pull/N"):

    $ printf 0000 >in
    $ hyperfine --warmup=1 \
      'git -c transfer.hideRefs=refs/pull upload-pack . <in' \
      'git.compile -c transfer.hideRefs=refs/pull -c uploadpack.allowTipSHA1InWant upload-pack . <in' \
      'git.compile -c transfer.hideRefs=refs/pull upload-pack . <in'
    Benchmark 1: git -c transfer.hideRefs=refs/pull upload-pack . <in
      Time (mean ± σ):     406.9 ms ±   1.1 ms    [User: 357.3 ms, System: 49.5 ms]
      Range (min … max):   405.7 ms … 409.2 ms    10 runs

    Benchmark 2: git.compile -c transfer.hideRefs=refs/pull -c uploadpack.allowTipSHA1InWant upload-pack . <in
      Time (mean ± σ):     406.5 ms ±   1.3 ms    [User: 356.5 ms, System: 49.9 ms]
      Range (min … max):   404.6 ms … 408.8 ms    10 runs

    Benchmark 3: git.compile -c transfer.hideRefs=refs/pull upload-pack . <in
      Time (mean ± σ):       4.7 ms ±   0.2 ms    [User: 0.7 ms, System: 3.9 ms]
      Range (min … max):     4.3 ms …   6.1 ms    472 runs

    Summary
      'git.compile -c transfer.hideRefs=refs/pull upload-pack . <in' ran
       86.62 ± 4.33 times faster than 'git.compile -c transfer.hideRefs=refs/pull -c uploadpack.allowTipSHA1InWant upload-pack . <in'
       86.70 ± 4.33 times faster than 'git -c transfer.hideRefs=refs/pull upload-pack . <in'

As above, we must visit every reference when
uploadPack.allowTipSHA1InWant is set. But when it is unset, we can visit
far fewer references.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 upload-pack.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

Comments

Patrick Steinhardt May 9, 2023, 3:15 p.m. UTC | #1
On Mon, May 08, 2023 at 06:00:26PM -0400, Taylor Blau wrote:
[snip]
> diff --git a/upload-pack.c b/upload-pack.c
> index 7c646ea5bd..0162fffce0 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -601,11 +601,32 @@ static int get_common_commits(struct upload_pack_data *data,
>  	}
>  }
>  
> +static int allow_hidden_refs(enum allow_uor allow_uor)
> +{
> +	return allow_uor & (ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1);
> +}
> +
> +static void for_each_namespaced_ref_1(each_ref_fn fn,
> +				      struct upload_pack_data *data)

I know it's common practice in the Git project, but personally I tend to
fight with functions that have a `_1` suffix. You simply cannot tell
what the difference is to the non-suffixed variant without checking its
declaration.

`for_each_namespaced_ref_with_optional_hidden_refs()` is definitely a
mouthful though, and I can't really think of something better either.

> +{
> +	/*
> +	 * If `data->allow_uor` allows updating hidden refs, we need to
> +	 * mark all references (including hidden ones), to check in
> +	 * `is_our_ref()` below.

Doesn't this influence whether somebody can _fetch_ objects pointed to
by the hidden refs instead of _updating_ them?

Patrick
Taylor Blau May 9, 2023, 9:34 p.m. UTC | #2
On Tue, May 09, 2023 at 05:15:50PM +0200, Patrick Steinhardt wrote:
> On Mon, May 08, 2023 at 06:00:26PM -0400, Taylor Blau wrote:
> > @@ -601,11 +601,32 @@ static int get_common_commits(struct upload_pack_data *data,
> >  	}
> >  }
> >
> > +static int allow_hidden_refs(enum allow_uor allow_uor)
> > +{
> > +	return allow_uor & (ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1);
> > +}
> > +
> > +static void for_each_namespaced_ref_1(each_ref_fn fn,
> > +				      struct upload_pack_data *data)
>
> I know it's common practice in the Git project, but personally I tend to
> fight with functions that have a `_1` suffix. You simply cannot tell
> what the difference is to the non-suffixed variant without checking its
> declaration.
>
> `for_each_namespaced_ref_with_optional_hidden_refs()` is definitely a
> mouthful though, and I can't really think of something better either.

Yeah, I know. It's not my favorite convention, either, but I also
couldn't come up with anything shorter ;-).

> > +{
> > +	/*
> > +	 * If `data->allow_uor` allows updating hidden refs, we need to
> > +	 * mark all references (including hidden ones), to check in
> > +	 * `is_our_ref()` below.
>
> Doesn't this influence whether somebody can _fetch_ objects pointed to
> by the hidden refs instead of _updating_ them?

Oops, yes. Thanks for catching my obvious typo.

Thanks,
Taylor
diff mbox series

Patch

diff --git a/upload-pack.c b/upload-pack.c
index 7c646ea5bd..0162fffce0 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -601,11 +601,32 @@  static int get_common_commits(struct upload_pack_data *data,
 	}
 }
 
+static int allow_hidden_refs(enum allow_uor allow_uor)
+{
+	return allow_uor & (ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1);
+}
+
+static void for_each_namespaced_ref_1(each_ref_fn fn,
+				      struct upload_pack_data *data)
+{
+	/*
+	 * If `data->allow_uor` allows updating hidden refs, we need to
+	 * mark all references (including hidden ones), to check in
+	 * `is_our_ref()` below.
+	 *
+	 * Otherwise, we only care about whether each reference's object
+	 * has the OUR_REF bit set or not, so do not need to visit
+	 * hidden references.
+	 */
+	if (allow_hidden_refs(data->allow_uor))
+		for_each_namespaced_ref(NULL, fn, data);
+	else
+		for_each_namespaced_ref(data->hidden_refs.v, fn, data);
+}
+
 static int is_our_ref(struct object *o, enum allow_uor allow_uor)
 {
-	int allow_hidden_ref = (allow_uor &
-				(ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1));
-	return o->flags & ((allow_hidden_ref ? HIDDEN_REF : 0) | OUR_REF);
+	return o->flags & ((allow_hidden_refs(allow_uor) ? HIDDEN_REF : 0) | OUR_REF);
 }
 
 /*
@@ -854,7 +875,7 @@  static void deepen(struct upload_pack_data *data, int depth)
 		 * marked with OUR_REF.
 		 */
 		head_ref_namespaced(check_ref, data);
-		for_each_namespaced_ref(NULL, check_ref, data);
+		for_each_namespaced_ref_1(check_ref, data);
 
 		get_reachable_list(data, &reachable_shallows);
 		result = get_shallow_commits(&reachable_shallows,
@@ -1378,7 +1399,7 @@  void upload_pack(const int advertise_refs, const int stateless_rpc,
 		if (advertise_refs)
 			data.no_done = 1;
 		head_ref_namespaced(send_ref, &data);
-		for_each_namespaced_ref(NULL, send_ref, &data);
+		for_each_namespaced_ref_1(send_ref, &data);
 		/*
 		 * fflush stdout before calling advertise_shallow_grafts because send_ref
 		 * uses stdio.
@@ -1388,7 +1409,7 @@  void upload_pack(const int advertise_refs, const int stateless_rpc,
 		packet_flush(1);
 	} else {
 		head_ref_namespaced(check_ref, &data);
-		for_each_namespaced_ref(NULL, check_ref, &data);
+		for_each_namespaced_ref_1(check_ref, &data);
 	}
 
 	if (!advertise_refs) {