diff mbox series

[1/2] unpack-trees: refactor prefetching code

Message ID f502997d159c7a30862fab3c3b443291539b6f29.1627066238.git.jonathantanmy@google.com (mailing list archive)
State Accepted
Commit b2896d27391afcbc990439e63972bb33693a7d6b
Headers show
Series Another partial clone prefetch | expand

Commit Message

Jonathan Tan July 23, 2021, 6:52 p.m. UTC
Refactor the prefetching code in unpack-trees.c into its own function,
because it will be used elsewhere in a subsequent commit.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 cache.h        |  9 +++++++++
 read-cache.c   | 23 +++++++++++++++++++++++
 unpack-trees.c | 27 ++++++++-------------------
 3 files changed, 40 insertions(+), 19 deletions(-)

Comments

Elijah Newren July 23, 2021, 8:26 p.m. UTC | #1
On Fri, Jul 23, 2021 at 11:55 AM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> Refactor the prefetching code in unpack-trees.c into its own function,
> because it will be used elsewhere in a subsequent commit.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  cache.h        |  9 +++++++++
>  read-cache.c   | 23 +++++++++++++++++++++++
>  unpack-trees.c | 27 ++++++++-------------------
>  3 files changed, 40 insertions(+), 19 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index ba04ff8bd3..6f952e22c6 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -410,6 +410,15 @@ struct cache_entry *dup_cache_entry(const struct cache_entry *ce, struct index_s
>   */
>  void validate_cache_entries(const struct index_state *istate);
>
> +/*
> + * Bulk prefetch all missing cache entries that are not GITLINKs and that match
> + * the given predicate. This function should only be called if
> + * has_promisor_remote() returns true.
> + */
> +typedef int (*must_prefetch_predicate)(const struct cache_entry *);
> +void prefetch_cache_entries(const struct index_state *istate,
> +                           must_prefetch_predicate must_prefetch);
> +
>  #ifdef USE_THE_INDEX_COMPATIBILITY_MACROS
>  extern struct index_state the_index;
>
> diff --git a/read-cache.c b/read-cache.c
> index ba2b012a6c..4e396bf17f 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -27,6 +27,7 @@
>  #include "progress.h"
>  #include "sparse-index.h"
>  #include "csum-file.h"
> +#include "promisor-remote.h"
>
>  /* Mask for the name length in ce_flags in the on-disk index */
>
> @@ -3657,3 +3658,25 @@ static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_ta
>                 strbuf_add(sb, &buffer, sizeof(uint32_t));
>         }
>  }
> +
> +void prefetch_cache_entries(const struct index_state *istate,
> +                           must_prefetch_predicate must_prefetch)
> +{
> +       int i;
> +       struct oid_array to_fetch = OID_ARRAY_INIT;
> +
> +       for (i = 0; i < istate->cache_nr; i++) {
> +               struct cache_entry *ce = istate->cache[i];
> +
> +               if (S_ISGITLINK(ce->ce_mode) || !must_prefetch(ce))
> +                       continue;
> +               if (!oid_object_info_extended(the_repository, &ce->oid,
> +                                             NULL,
> +                                             OBJECT_INFO_FOR_PREFETCH))
> +                       continue;
> +               oid_array_append(&to_fetch, &ce->oid);
> +       }
> +       promisor_remote_get_direct(the_repository,
> +                                  to_fetch.oid, to_fetch.nr);
> +       oid_array_clear(&to_fetch);
> +}
> diff --git a/unpack-trees.c b/unpack-trees.c
> index f88a69f8e7..ed92794032 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -392,6 +392,11 @@ static void report_collided_checkout(struct index_state *index)
>         string_list_clear(&list, 0);
>  }
>
> +static int must_checkout(const struct cache_entry *ce)
> +{
> +       return ce->ce_flags & CE_UPDATE;
> +}
> +
>  static int check_updates(struct unpack_trees_options *o,
>                          struct index_state *index)
>  {
> @@ -442,28 +447,12 @@ static int check_updates(struct unpack_trees_options *o,
>         if (should_update_submodules())
>                 load_gitmodules_file(index, &state);
>
> -       if (has_promisor_remote()) {
> +       if (has_promisor_remote())
>                 /*
>                  * Prefetch the objects that are to be checked out in the loop
>                  * below.
>                  */
> -               struct oid_array to_fetch = OID_ARRAY_INIT;
> -               for (i = 0; i < index->cache_nr; i++) {
> -                       struct cache_entry *ce = index->cache[i];
> -
> -                       if (!(ce->ce_flags & CE_UPDATE) ||
> -                           S_ISGITLINK(ce->ce_mode))
> -                               continue;
> -                       if (!oid_object_info_extended(the_repository, &ce->oid,
> -                                                     NULL,
> -                                                     OBJECT_INFO_FOR_PREFETCH))
> -                               continue;
> -                       oid_array_append(&to_fetch, &ce->oid);
> -               }
> -               promisor_remote_get_direct(the_repository,
> -                                          to_fetch.oid, to_fetch.nr);
> -               oid_array_clear(&to_fetch);
> -       }
> +               prefetch_cache_entries(index, must_checkout);
>
>         get_parallel_checkout_configs(&pc_workers, &pc_threshold);
>
> @@ -473,7 +462,7 @@ static int check_updates(struct unpack_trees_options *o,
>         for (i = 0; i < index->cache_nr; i++) {
>                 struct cache_entry *ce = index->cache[i];
>
> -               if (ce->ce_flags & CE_UPDATE) {
> +               if (must_checkout(ce)) {
>                         size_t last_pc_queue_size = pc_queue_size();
>
>                         if (ce->ce_flags & CE_WT_REMOVE)
> --
> 2.32.0.432.gabb21c7263-goog

This might have been slightly easier to review if you had introduced
must_checkout() first, and then made the new prefetch_cache_entries()
in a separate commit, so that comparing old and new code I didn't have
to try to deduce the separate steps.  However, that's a really minor
point since must_checkout() is so small.

Anyway, this patch looks good to me.
diff mbox series

Patch

diff --git a/cache.h b/cache.h
index ba04ff8bd3..6f952e22c6 100644
--- a/cache.h
+++ b/cache.h
@@ -410,6 +410,15 @@  struct cache_entry *dup_cache_entry(const struct cache_entry *ce, struct index_s
  */
 void validate_cache_entries(const struct index_state *istate);
 
+/*
+ * Bulk prefetch all missing cache entries that are not GITLINKs and that match
+ * the given predicate. This function should only be called if
+ * has_promisor_remote() returns true.
+ */
+typedef int (*must_prefetch_predicate)(const struct cache_entry *);
+void prefetch_cache_entries(const struct index_state *istate,
+			    must_prefetch_predicate must_prefetch);
+
 #ifdef USE_THE_INDEX_COMPATIBILITY_MACROS
 extern struct index_state the_index;
 
diff --git a/read-cache.c b/read-cache.c
index ba2b012a6c..4e396bf17f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -27,6 +27,7 @@ 
 #include "progress.h"
 #include "sparse-index.h"
 #include "csum-file.h"
+#include "promisor-remote.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -3657,3 +3658,25 @@  static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_ta
 		strbuf_add(sb, &buffer, sizeof(uint32_t));
 	}
 }
+
+void prefetch_cache_entries(const struct index_state *istate,
+			    must_prefetch_predicate must_prefetch)
+{
+	int i;
+	struct oid_array to_fetch = OID_ARRAY_INIT;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+
+		if (S_ISGITLINK(ce->ce_mode) || !must_prefetch(ce))
+			continue;
+		if (!oid_object_info_extended(the_repository, &ce->oid,
+					      NULL,
+					      OBJECT_INFO_FOR_PREFETCH))
+			continue;
+		oid_array_append(&to_fetch, &ce->oid);
+	}
+	promisor_remote_get_direct(the_repository,
+				   to_fetch.oid, to_fetch.nr);
+	oid_array_clear(&to_fetch);
+}
diff --git a/unpack-trees.c b/unpack-trees.c
index f88a69f8e7..ed92794032 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -392,6 +392,11 @@  static void report_collided_checkout(struct index_state *index)
 	string_list_clear(&list, 0);
 }
 
+static int must_checkout(const struct cache_entry *ce)
+{
+	return ce->ce_flags & CE_UPDATE;
+}
+
 static int check_updates(struct unpack_trees_options *o,
 			 struct index_state *index)
 {
@@ -442,28 +447,12 @@  static int check_updates(struct unpack_trees_options *o,
 	if (should_update_submodules())
 		load_gitmodules_file(index, &state);
 
-	if (has_promisor_remote()) {
+	if (has_promisor_remote())
 		/*
 		 * Prefetch the objects that are to be checked out in the loop
 		 * below.
 		 */
-		struct oid_array to_fetch = OID_ARRAY_INIT;
-		for (i = 0; i < index->cache_nr; i++) {
-			struct cache_entry *ce = index->cache[i];
-
-			if (!(ce->ce_flags & CE_UPDATE) ||
-			    S_ISGITLINK(ce->ce_mode))
-				continue;
-			if (!oid_object_info_extended(the_repository, &ce->oid,
-						      NULL,
-						      OBJECT_INFO_FOR_PREFETCH))
-				continue;
-			oid_array_append(&to_fetch, &ce->oid);
-		}
-		promisor_remote_get_direct(the_repository,
-					   to_fetch.oid, to_fetch.nr);
-		oid_array_clear(&to_fetch);
-	}
+		prefetch_cache_entries(index, must_checkout);
 
 	get_parallel_checkout_configs(&pc_workers, &pc_threshold);
 
@@ -473,7 +462,7 @@  static int check_updates(struct unpack_trees_options *o,
 	for (i = 0; i < index->cache_nr; i++) {
 		struct cache_entry *ce = index->cache[i];
 
-		if (ce->ce_flags & CE_UPDATE) {
+		if (must_checkout(ce)) {
 			size_t last_pc_queue_size = pc_queue_size();
 
 			if (ce->ce_flags & CE_WT_REMOVE)