diff mbox series

[1/2] connected: verify promisor-ness of partial clone

Message ID 6a4f704e475fe1669e63731333fce9ed09d17d0c.1578802317.git.jonathantanmy@google.com (mailing list archive)
State New, archived
Headers show
Series Skip a connectivity check during fetch --filter | expand

Commit Message

Jonathan Tan Jan. 12, 2020, 4:15 a.m. UTC
Commit dfa33a298d ("clone: do faster object check for partial clones",
2019-04-21) optimized the connectivity check done when cloning with
--filter to check only the existence of objects directly pointed to by
refs. But this is not sufficient: they also need to be promisor objects.
Make this check more robust by instead checking that these objects are
promisor objects, that is, they appear in a promisor pack.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 builtin/clone.c |  5 +++--
 connected.c     | 19 ++++++++++++++-----
 connected.h     | 11 ++++++-----
 3 files changed, 23 insertions(+), 12 deletions(-)

Comments

Jonathan Nieder Jan. 29, 2020, 8:41 p.m. UTC | #1
Jonathan Tan wrote:

> Commit dfa33a298d ("clone: do faster object check for partial clones",
> 2019-04-21) optimized the connectivity check done when cloning with
> --filter to check only the existence of objects directly pointed to by
> refs. But this is not sufficient: they also need to be promisor objects.
> Make this check more robust by instead checking that these objects are
> promisor objects, that is, they appear in a promisor pack.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  builtin/clone.c |  5 +++--
>  connected.c     | 19 ++++++++++++++-----
>  connected.h     | 11 ++++++-----
>  3 files changed, 23 insertions(+), 12 deletions(-)

Good call.  Sorry for the slow review.

[...]
> --- a/connected.c
> +++ b/connected.c
> @@ -52,19 +52,28 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
>  		strbuf_release(&idx_file);
>  	}
>  
> -	if (opt->check_refs_only) {
> +	if (opt->check_refs_are_promisor_objects_only) {
>  		/*
>  		 * For partial clones, we don't want to have to do a regular
>  		 * connectivity check because we have to enumerate and exclude
>  		 * all promisor objects (slow), and then the connectivity check
>  		 * itself becomes a no-op because in a partial clone every
>  		 * object is a promisor object. Instead, just make sure we
> -		 * received the objects pointed to by each wanted ref.
> +		 * received, in a promisor packfile, the objects pointed to by
> +		 * each wanted ref.
>  		 */
>  		do {
> -			if (!repo_has_object_file_with_flags(the_repository, &oid,
> -							     OBJECT_INFO_SKIP_FETCH_OBJECT))
> -				return 1;
> +			struct packed_git *p;
> +
> +			for (p = get_all_packs(the_repository); p; p = p->next) {
> +				if (!p->pack_promisor)
> +					continue;
> +				if (find_pack_entry_one(oid.hash, p))
> +					goto promisor_pack_found;
> +			}
> +			return 1;
> +promisor_pack_found:
> +			;
>  		} while (!fn(cb_data, &oid));
>  		return 0;

Yep, does what it says on the tin.

Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
diff mbox series

Patch

diff --git a/builtin/clone.c b/builtin/clone.c
index 0fc89ae2b9..0516181052 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -673,7 +673,7 @@  static void update_remote_refs(const struct ref *refs,
 			       const char *msg,
 			       struct transport *transport,
 			       int check_connectivity,
-			       int check_refs_only)
+			       int check_refs_are_promisor_objects_only)
 {
 	const struct ref *rm = mapped_refs;
 
@@ -682,7 +682,8 @@  static void update_remote_refs(const struct ref *refs,
 
 		opt.transport = transport;
 		opt.progress = transport->progress;
-		opt.check_refs_only = !!check_refs_only;
+		opt.check_refs_are_promisor_objects_only =
+			!!check_refs_are_promisor_objects_only;
 
 		if (check_connected(iterate_ref_map, &rm, &opt))
 			die(_("remote did not send all necessary objects"));
diff --git a/connected.c b/connected.c
index c337f5f7f4..7e9bd1bc62 100644
--- a/connected.c
+++ b/connected.c
@@ -52,19 +52,28 @@  int check_connected(oid_iterate_fn fn, void *cb_data,
 		strbuf_release(&idx_file);
 	}
 
-	if (opt->check_refs_only) {
+	if (opt->check_refs_are_promisor_objects_only) {
 		/*
 		 * For partial clones, we don't want to have to do a regular
 		 * connectivity check because we have to enumerate and exclude
 		 * all promisor objects (slow), and then the connectivity check
 		 * itself becomes a no-op because in a partial clone every
 		 * object is a promisor object. Instead, just make sure we
-		 * received the objects pointed to by each wanted ref.
+		 * received, in a promisor packfile, the objects pointed to by
+		 * each wanted ref.
 		 */
 		do {
-			if (!repo_has_object_file_with_flags(the_repository, &oid,
-							     OBJECT_INFO_SKIP_FETCH_OBJECT))
-				return 1;
+			struct packed_git *p;
+
+			for (p = get_all_packs(the_repository); p; p = p->next) {
+				if (!p->pack_promisor)
+					continue;
+				if (find_pack_entry_one(oid.hash, p))
+					goto promisor_pack_found;
+			}
+			return 1;
+promisor_pack_found:
+			;
 		} while (!fn(cb_data, &oid));
 		return 0;
 	}
diff --git a/connected.h b/connected.h
index ce2e7d8f2e..eba5c261ba 100644
--- a/connected.h
+++ b/connected.h
@@ -48,12 +48,13 @@  struct check_connected_options {
 	unsigned is_deepening_fetch : 1;
 
 	/*
-	 * If non-zero, only check the top-level objects referenced by the
-	 * wanted refs (passed in as cb_data). This is useful for partial
-	 * clones, where enumerating and excluding all promisor objects is very
-	 * slow and the commit-walk itself becomes a no-op.
+	 * If non-zero, only check that the top-level objects referenced by the
+	 * wanted refs (passed in as cb_data) are promisor objects. This is
+	 * useful for partial clones, where enumerating and excluding all
+	 * promisor objects is very slow and the commit-walk itself becomes a
+	 * no-op.
 	 */
-	unsigned check_refs_only : 1;
+	unsigned check_refs_are_promisor_objects_only : 1;
 };
 
 #define CHECK_CONNECTED_INIT { 0 }