diff mbox series

[1/2] packfile: split promisor objects oidset into two

Message ID 20240919234741.1317946-2-calvinwan@google.com (mailing list archive)
State New
Headers show
Series [1/2] packfile: split promisor objects oidset into two | expand

Commit Message

Calvin Wan Sept. 19, 2024, 11:47 p.m. UTC
From: Han Young <hanyang.tony@bytedance.com>

split promisor objects oidset into two, one is objects in promisor packfile,
and other set is objects referenced in promisor packfile. This enable us to
check if an object is in promisor packfile.

Signed-off-by: Han Young <hanyang.tony@bytedance.com>
---
 packfile.c | 24 +++++++++++++++---------
 packfile.h |  7 ++++++-
 2 files changed, 21 insertions(+), 10 deletions(-)

Comments

Junio C Hamano Sept. 22, 2024, 6:37 a.m. UTC | #1
Calvin Wan <calvinwan@google.com> writes:

> From: Han Young <hanyang.tony@bytedance.com>
>
> split promisor objects oidset into two, one is objects in promisor packfile,
> and other set is objects referenced in promisor packfile. This enable us to
> check if an object is in promisor packfile.

OK, so the idea is that we can discard the objects that are _in_ a
promisor packfile and assume that we can fetch them back?

Objects that are referenced by objects in the promisor packfile may
or may not be in the same packfile, and we obviously cannot expect
that we can refetch those that are not in the promisor packfile from
the promisor.  So what is the other list for?

What I am wondering is what good the existing helper function
is_promisor_object() is for.  It will say "yes" for objects that we
may have obtained from the promisor remote (hence we can lazily
fetch them again even if we lost them) and in promisor packs, but it
may also say "yes" for any object that an object that is in a
promisor pack (e.g., a tree object that represents a subdirectory
that was not modified by a commit in a promisor pack, a parent
commit of a commit in a promisor pack, etc.).  In other words, are
the callers getting any useful answer to their question to the
helper function, or are they all buggy for not asking "is this
object in a promisor pack" and allowing the helper to say "yes" for
objects that are merely referenced by an object in promisor packs?

Thanks.


> -int is_promisor_object(const struct object_id *oid)
> +int is_in_promisor_pack(const struct object_id *oid, int referenced)
>  {
> -	static struct oidset promisor_objects;
> +	static struct promisor_objects promisor_objects;
>  	static int promisor_objects_prepared;
>  
>  	if (!promisor_objects_prepared) {
> @@ -2303,5 +2308,6 @@ int is_promisor_object(const struct object_id *oid)
>  		}
>  		promisor_objects_prepared = 1;
>  	}
> -	return oidset_contains(&promisor_objects, oid);
> +	return oidset_contains(&promisor_objects.promisor_pack_objects, oid) ||
> +		(referenced && oidset_contains(&promisor_objects.promisor_pack_referenced_objects, oid));
>  }
> diff --git a/packfile.h b/packfile.h
> index 0f78658229..13a349e223 100644
> --- a/packfile.h
> +++ b/packfile.h
> @@ -195,11 +195,16 @@ int has_object_kept_pack(const struct object_id *oid, unsigned flags);
>  
>  int has_pack_index(const unsigned char *sha1);
>  
> +int is_in_promisor_pack(const struct object_id *oid, int referenced);
> +
>  /*
>   * Return 1 if an object in a promisor packfile is or refers to the given
>   * object, 0 otherwise.
>   */
> -int is_promisor_object(const struct object_id *oid);
> +static inline int is_promisor_object(const struct object_id *oid)
> +{
> +	return is_in_promisor_pack(oid, 1);
> +}
>  
>  /*
>   * Expose a function for fuzz testing.
diff mbox series

Patch

diff --git a/packfile.c b/packfile.c
index cf12a539ea..3ff191b2e7 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2234,12 +2234,17 @@  int for_each_packed_object(each_packed_object_fn cb, void *data,
 	return r ? r : pack_errors;
 }
 
+struct promisor_objects {
+	struct oidset promisor_pack_objects;
+	struct oidset promisor_pack_referenced_objects;
+};
+
 static int add_promisor_object(const struct object_id *oid,
 			       struct packed_git *pack UNUSED,
 			       uint32_t pos UNUSED,
 			       void *set_)
 {
-	struct oidset *set = set_;
+	struct promisor_objects *set = set_;
 	struct object *obj;
 	int we_parsed_object;
 
@@ -2254,7 +2259,7 @@  static int add_promisor_object(const struct object_id *oid,
 	if (!obj)
 		return 1;
 
-	oidset_insert(set, oid);
+	oidset_insert(&set->promisor_pack_objects, oid);
 
 	/*
 	 * If this is a tree, commit, or tag, the objects it refers
@@ -2272,26 +2277,26 @@  static int add_promisor_object(const struct object_id *oid,
 			 */
 			return 0;
 		while (tree_entry_gently(&desc, &entry))
-			oidset_insert(set, &entry.oid);
+			oidset_insert(&set->promisor_pack_referenced_objects, &entry.oid);
 		if (we_parsed_object)
 			free_tree_buffer(tree);
 	} else if (obj->type == OBJ_COMMIT) {
 		struct commit *commit = (struct commit *) obj;
 		struct commit_list *parents = commit->parents;
 
-		oidset_insert(set, get_commit_tree_oid(commit));
+		oidset_insert(&set->promisor_pack_referenced_objects, get_commit_tree_oid(commit));
 		for (; parents; parents = parents->next)
-			oidset_insert(set, &parents->item->object.oid);
+			oidset_insert(&set->promisor_pack_referenced_objects, &parents->item->object.oid);
 	} else if (obj->type == OBJ_TAG) {
 		struct tag *tag = (struct tag *) obj;
-		oidset_insert(set, get_tagged_oid(tag));
+		oidset_insert(&set->promisor_pack_referenced_objects, get_tagged_oid(tag));
 	}
 	return 0;
 }
 
-int is_promisor_object(const struct object_id *oid)
+int is_in_promisor_pack(const struct object_id *oid, int referenced)
 {
-	static struct oidset promisor_objects;
+	static struct promisor_objects promisor_objects;
 	static int promisor_objects_prepared;
 
 	if (!promisor_objects_prepared) {
@@ -2303,5 +2308,6 @@  int is_promisor_object(const struct object_id *oid)
 		}
 		promisor_objects_prepared = 1;
 	}
-	return oidset_contains(&promisor_objects, oid);
+	return oidset_contains(&promisor_objects.promisor_pack_objects, oid) ||
+		(referenced && oidset_contains(&promisor_objects.promisor_pack_referenced_objects, oid));
 }
diff --git a/packfile.h b/packfile.h
index 0f78658229..13a349e223 100644
--- a/packfile.h
+++ b/packfile.h
@@ -195,11 +195,16 @@  int has_object_kept_pack(const struct object_id *oid, unsigned flags);
 
 int has_pack_index(const unsigned char *sha1);
 
+int is_in_promisor_pack(const struct object_id *oid, int referenced);
+
 /*
  * Return 1 if an object in a promisor packfile is or refers to the given
  * object, 0 otherwise.
  */
-int is_promisor_object(const struct object_id *oid);
+static inline int is_promisor_object(const struct object_id *oid)
+{
+	return is_in_promisor_pack(oid, 1);
+}
 
 /*
  * Expose a function for fuzz testing.