diff mbox series

[v2,1/3] refs: keep track of unresolved reference value in iterators

Message ID ac0957c9e6abdc2597900573703461833e9c9d69.1722524334.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series keep track of unresolved value of symbolic-ref in ref iterators | expand

Commit Message

John Cai Aug. 1, 2024, 2:58 p.m. UTC
From: John Cai <johncai86@gmail.com>

Since ref iterators do not hold onto the direct value of a reference
without resolving it, the only way to get ahold of a direct value of a
symbolic ref is to make a separate call to refs_read_symbolic_ref.

To make accessing the direct value of a symbolic ref more efficient,
let's save the direct value of the ref in the iterators for both the
files backend and the reftable backend.

Signed-off-by: John Cai <johncai86@gmail.com>
---
 refs/files-backend.c    | 14 ++++++++++----
 refs/iterator.c         |  3 +++
 refs/ref-cache.c        |  8 ++++++++
 refs/ref-cache.h        |  2 ++
 refs/refs-internal.h    |  1 +
 refs/reftable-backend.c |  8 ++++++--
 6 files changed, 30 insertions(+), 6 deletions(-)

Comments

Junio C Hamano Aug. 1, 2024, 4:41 p.m. UTC | #1
"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> @@ -245,9 +245,11 @@ static void loose_fill_ref_dir_regular_file(struct files_ref_store *refs,
>  {
>  	struct object_id oid;
>  	int flag;
> -
> -	if (!refs_resolve_ref_unsafe(&refs->base, refname, RESOLVE_REF_READING,
> -				     &oid, &flag)) {
> +	const char* referent = refs_resolve_ref_unsafe(&refs->base,

Style.  The asterisk sticks to the pointer variable, not the base type.

> +						       refname,
> +						       RESOLVE_REF_READING,
> +						       &oid, &flag);
> +	if (!referent) {
>  		oidclr(&oid, the_repository->hash_algo);
>  		flag |= REF_ISBROKEN;
>  	} else if (is_null_oid(&oid)) {
> @@ -268,7 +270,8 @@ static void loose_fill_ref_dir_regular_file(struct files_ref_store *refs,
>  		oidclr(&oid, the_repository->hash_algo);
>  		flag |= REF_BAD_NAME | REF_ISBROKEN;
>  	}
> -	add_entry_to_dir(dir, create_ref_entry(refname, &oid, flag));
> +
> +	add_entry_to_dir(dir, create_ref_entry(refname, referent, &oid, flag));
>  }

This is a very straight-forward change, given the matching change to
the ref-entry, which now has a referent member.

> @@ -886,6 +889,9 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator)
>  		iter->base.refname = iter->iter0->refname;
>  		iter->base.oid = iter->iter0->oid;
>  		iter->base.flags = iter->iter0->flags;
> +		if (iter->iter0->flags & REF_ISSYMREF)
> +			iter->base.referent = iter->iter0->referent;

Presumably base.referent is initialized to NULL so this "if"
statement does not need an else clause?  

I am primarily wondering why this needs to be conditional.  We are
making verbatim copy of the flags word from *iter->iter0 to
iter->base; if iter0 is symref we want to mark base also as symref,
If iter0 is not a symref, then we want to mark base also not a
symref, presumably.  So wouldn't it make sense to just say

		iter->base.referent = iter->iter0->referent;

here, regardless of what iter->iter0->flags say about symref-ness of
the thing?  Because ...

> diff --git a/refs/iterator.c b/refs/iterator.c
> index d355ebf0d59..26acb6bd640 100644
> --- a/refs/iterator.c
> +++ b/refs/iterator.c
> @@ -7,6 +7,7 @@
>  #include "refs.h"
>  #include "refs/refs-internal.h"
>  #include "iterator.h"
> +#include "strbuf.h"
>  
>  int ref_iterator_advance(struct ref_iterator *ref_iterator)
>  {
> @@ -29,6 +30,7 @@ void base_ref_iterator_init(struct ref_iterator *iter,
>  {
>  	iter->vtable = vtable;
>  	iter->refname = NULL;
> +	iter->referent = NULL;
>  	iter->oid = NULL;
>  	iter->flags = 0;
>  }
> @@ -199,6 +201,7 @@ static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator)
>  		}
>  
>  		if (selection & ITER_YIELD_CURRENT) {
> +			iter->base.referent = (*iter->current)->referent;
>  			iter->base.refname = (*iter->current)->refname;
>  			iter->base.oid = (*iter->current)->oid;
>  			iter->base.flags = (*iter->current)->flags;

... other parts of the API seem to follow that philosophy.

In other words, the invariant of this code is that .referent is
non-NULL if and only if the ref is a symref, and that invariant is
trusted without checking with .flags member.  But the earlier hunk
that copied iter0 to base did not seem to be using that invariant,
which made it look inconsistent.

>  struct ref_entry *create_ref_entry(const char *refname,
> +				   const char *referent,
>  				   const struct object_id *oid, int flag)
>  {
>  	struct ref_entry *ref;
> @@ -41,6 +43,10 @@ struct ref_entry *create_ref_entry(const char *refname,
>  	FLEX_ALLOC_STR(ref, name, refname);
>  	oidcpy(&ref->u.value.oid, oid);
>  	ref->flag = flag;
> +
> +	if (flag & REF_ISSYMREF)
> +		ref->u.value.referent = xstrdup_or_null(referent);

OK.  ref_value now has referent next to the existing oid, but that
member only makes sense when flag says it is a symref.

Curiously, that information is missing from ref_value struct, so by
looking at a ref_value alone, we cannot tell if we should trust the
value in the .referent member?

Makes me wonder if we should follow the same "ignore what the flag
says when filling the .referent member; if the ref is not a symref,
the referent variable is NULL, and if it is, referent is never NULL"
pattern?  Then ref->u.value.referent is _always_ defined---the
current code says "the u.value.referent member is undefined for ref
that is not a symref", but with the suggested change, it will be
"the u.value.referent member is NULL for ref that is not a symref,
and for a symref, it is the value of the symref".

>  	return ref;
>  }
>  
> @@ -66,6 +72,7 @@ static void free_ref_entry(struct ref_entry *entry)
>  		 */
>  		clear_ref_dir(&entry->u.subdir);
>  	}
> +	free(entry->u.value.referent);

And that would match what is done here.  We do not say "entry->flag
says it is not a symref, so do not bother freeing u.value.referent".

> @@ -431,6 +438,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator)
>  			level->index = -1;
>  		} else {
>  			iter->base.refname = entry->name;
> +			iter->base.referent = entry->u.value.referent;
>  			iter->base.oid = &entry->u.value.oid;
>  			iter->base.flags = entry->flag;
>  			return ITER_OK;
> diff --git a/refs/ref-cache.h b/refs/ref-cache.h
> index 31ebe24f6cf..5f04e518c37 100644
> --- a/refs/ref-cache.h
> +++ b/refs/ref-cache.h
> @@ -42,6 +42,7 @@ struct ref_value {
>  	 * referred to by the last reference in the symlink chain.
>  	 */
>  	struct object_id oid;
> +	char *referent;
>  };
>  
>  /*
> @@ -173,6 +174,7 @@ struct ref_entry *create_dir_entry(struct ref_cache *cache,
>  				   const char *dirname, size_t len);
>  
>  struct ref_entry *create_ref_entry(const char *refname,
> +				   const char *referent,
>  				   const struct object_id *oid, int flag);
>  
>  /*
> diff --git a/refs/refs-internal.h b/refs/refs-internal.h
> index fa975d69aaa..117ec233848 100644
> --- a/refs/refs-internal.h
> +++ b/refs/refs-internal.h
> @@ -299,6 +299,7 @@ enum do_for_each_ref_flags {
>  struct ref_iterator {
>  	struct ref_iterator_vtable *vtable;
>  	const char *refname;
> +	const char *referent;
>  	const struct object_id *oid;
>  	unsigned int flags;
>  };
> diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
> index fbe74c239d3..9f724c3d632 100644
> --- a/refs/reftable-backend.c
> +++ b/refs/reftable-backend.c
> @@ -494,8 +494,12 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator)
>  				the_repository->hash_algo);
>  			break;
>  		case REFTABLE_REF_SYMREF:
> -			if (!refs_resolve_ref_unsafe(&iter->refs->base, iter->ref.refname,
> -						     RESOLVE_REF_READING, &iter->oid, &flags))
> +			iter->base.referent = refs_resolve_ref_unsafe(&iter->refs->base,
> +								      iter->ref.refname,
> +								      RESOLVE_REF_READING,
> +								      &iter->oid,
> +								      &flags);
> +			if (!iter->base.referent)
>  				oidclr(&iter->oid, the_repository->hash_algo);
>  			break;
>  		default:
Patrick Steinhardt Aug. 5, 2024, 10:59 a.m. UTC | #2
On Thu, Aug 01, 2024 at 09:41:03AM -0700, Junio C Hamano wrote:
> "John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:
> > @@ -886,6 +889,9 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator)
> >  		iter->base.refname = iter->iter0->refname;
> >  		iter->base.oid = iter->iter0->oid;
> >  		iter->base.flags = iter->iter0->flags;
> > +		if (iter->iter0->flags & REF_ISSYMREF)
> > +			iter->base.referent = iter->iter0->referent;
> 
> Presumably base.referent is initialized to NULL so this "if"
> statement does not need an else clause?

This function typically ends up being called in a loop though. So
without the else clause, wouldn't we potentially leak the value of a
preceding ref into subsequent iterations like this?

> I am primarily wondering why this needs to be conditional.  We are
> making verbatim copy of the flags word from *iter->iter0 to
> iter->base; if iter0 is symref we want to mark base also as symref,
> If iter0 is not a symref, then we want to mark base also not a
> symref, presumably.  So wouldn't it make sense to just say
> 
> 		iter->base.referent = iter->iter0->referent;
> 
> here, regardless of what iter->iter0->flags say about symref-ness of
> the thing?  Because ...

> > diff --git a/refs/iterator.c b/refs/iterator.c
> > index d355ebf0d59..26acb6bd640 100644
> > --- a/refs/iterator.c
> > +++ b/refs/iterator.c
> > @@ -7,6 +7,7 @@
> >  #include "refs.h"
> >  #include "refs/refs-internal.h"
> >  #include "iterator.h"
> > +#include "strbuf.h"
> >  
> >  int ref_iterator_advance(struct ref_iterator *ref_iterator)
> >  {
> > @@ -29,6 +30,7 @@ void base_ref_iterator_init(struct ref_iterator *iter,
> >  {
> >  	iter->vtable = vtable;
> >  	iter->refname = NULL;
> > +	iter->referent = NULL;
> >  	iter->oid = NULL;
> >  	iter->flags = 0;
> >  }
> > @@ -199,6 +201,7 @@ static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator)
> >  		}
> >  
> >  		if (selection & ITER_YIELD_CURRENT) {
> > +			iter->base.referent = (*iter->current)->referent;
> >  			iter->base.refname = (*iter->current)->refname;
> >  			iter->base.oid = (*iter->current)->oid;
> >  			iter->base.flags = (*iter->current)->flags;
> 
> ... other parts of the API seem to follow that philosophy.
> 
> In other words, the invariant of this code is that .referent is
> non-NULL if and only if the ref is a symref, and that invariant is
> trusted without checking with .flags member.  But the earlier hunk
> that copied iter0 to base did not seem to be using that invariant,
> which made it look inconsistent.

Agreed.

> >  struct ref_entry *create_ref_entry(const char *refname,
> > +				   const char *referent,
> >  				   const struct object_id *oid, int flag)
> >  {
> >  	struct ref_entry *ref;
> > @@ -41,6 +43,10 @@ struct ref_entry *create_ref_entry(const char *refname,
> >  	FLEX_ALLOC_STR(ref, name, refname);
> >  	oidcpy(&ref->u.value.oid, oid);
> >  	ref->flag = flag;
> > +
> > +	if (flag & REF_ISSYMREF)
> > +		ref->u.value.referent = xstrdup_or_null(referent);
> 
> OK.  ref_value now has referent next to the existing oid, but that
> member only makes sense when flag says it is a symref.
> 
> Curiously, that information is missing from ref_value struct, so by
> looking at a ref_value alone, we cannot tell if we should trust the
> value in the .referent member?
> 
> Makes me wonder if we should follow the same "ignore what the flag
> says when filling the .referent member; if the ref is not a symref,
> the referent variable is NULL, and if it is, referent is never NULL"
> pattern?  Then ref->u.value.referent is _always_ defined---the
> current code says "the u.value.referent member is undefined for ref
> that is not a symref", but with the suggested change, it will be
> "the u.value.referent member is NULL for ref that is not a symref,
> and for a symref, it is the value of the symref".

Yeah, I think that would be preferable indeed.

Patrick
Junio C Hamano Aug. 5, 2024, 3:40 p.m. UTC | #3
Patrick Steinhardt <ps@pks.im> writes:

> On Thu, Aug 01, 2024 at 09:41:03AM -0700, Junio C Hamano wrote:
>> "John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:
>> > @@ -886,6 +889,9 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator)
>> >  		iter->base.refname = iter->iter0->refname;
>> >  		iter->base.oid = iter->iter0->oid;
>> >  		iter->base.flags = iter->iter0->flags;
>> > +		if (iter->iter0->flags & REF_ISSYMREF)
>> > +			iter->base.referent = iter->iter0->referent;
>> 
>> Presumably base.referent is initialized to NULL so this "if"
>> statement does not need an else clause?
>
> This function typically ends up being called in a loop though. So
> without the else clause, wouldn't we potentially leak the value of a
> preceding ref into subsequent iterations like this?

OK, so this does need to clear it when we tell the caller we have
non SYMREF, as we do want to show NULL as base.referent to the
caller in such a case.  Thanks.

It does reinforce my larger point, which was:

>> Makes me wonder if we should follow the same "ignore what the flag
>> says when filling the .referent member; if the ref is not a symref,
>> the referent variable is NULL, and if it is, referent is never NULL"
>> pattern?  Then ref->u.value.referent is _always_ defined---the
>> current code says "the u.value.referent member is undefined for ref
>> that is not a symref", but with the suggested change, it will be
>> "the u.value.referent member is NULL for ref that is not a symref,
>> and for a symref, it is the value of the symref".
>
> Yeah, I think that would be preferable indeed.

In other words, with .referent member introduced, checking for
(.flags & REF_ISSYMREF) becomes a redundant&duplicated bit of
information, as the bit should exactly match the non-NULL ness of
the .referent member.

Thanks.
diff mbox series

Patch

diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c7..7640318cba8 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -245,9 +245,11 @@  static void loose_fill_ref_dir_regular_file(struct files_ref_store *refs,
 {
 	struct object_id oid;
 	int flag;
-
-	if (!refs_resolve_ref_unsafe(&refs->base, refname, RESOLVE_REF_READING,
-				     &oid, &flag)) {
+	const char* referent = refs_resolve_ref_unsafe(&refs->base,
+						       refname,
+						       RESOLVE_REF_READING,
+						       &oid, &flag);
+	if (!referent) {
 		oidclr(&oid, the_repository->hash_algo);
 		flag |= REF_ISBROKEN;
 	} else if (is_null_oid(&oid)) {
@@ -268,7 +270,8 @@  static void loose_fill_ref_dir_regular_file(struct files_ref_store *refs,
 		oidclr(&oid, the_repository->hash_algo);
 		flag |= REF_BAD_NAME | REF_ISBROKEN;
 	}
-	add_entry_to_dir(dir, create_ref_entry(refname, &oid, flag));
+
+	add_entry_to_dir(dir, create_ref_entry(refname, referent, &oid, flag));
 }
 
 /*
@@ -886,6 +889,9 @@  static int files_ref_iterator_advance(struct ref_iterator *ref_iterator)
 		iter->base.refname = iter->iter0->refname;
 		iter->base.oid = iter->iter0->oid;
 		iter->base.flags = iter->iter0->flags;
+		if (iter->iter0->flags & REF_ISSYMREF)
+			iter->base.referent = iter->iter0->referent;
+
 		return ITER_OK;
 	}
 
diff --git a/refs/iterator.c b/refs/iterator.c
index d355ebf0d59..26acb6bd640 100644
--- a/refs/iterator.c
+++ b/refs/iterator.c
@@ -7,6 +7,7 @@ 
 #include "refs.h"
 #include "refs/refs-internal.h"
 #include "iterator.h"
+#include "strbuf.h"
 
 int ref_iterator_advance(struct ref_iterator *ref_iterator)
 {
@@ -29,6 +30,7 @@  void base_ref_iterator_init(struct ref_iterator *iter,
 {
 	iter->vtable = vtable;
 	iter->refname = NULL;
+	iter->referent = NULL;
 	iter->oid = NULL;
 	iter->flags = 0;
 }
@@ -199,6 +201,7 @@  static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator)
 		}
 
 		if (selection & ITER_YIELD_CURRENT) {
+			iter->base.referent = (*iter->current)->referent;
 			iter->base.refname = (*iter->current)->refname;
 			iter->base.oid = (*iter->current)->oid;
 			iter->base.flags = (*iter->current)->flags;
diff --git a/refs/ref-cache.c b/refs/ref-cache.c
index 4ce519bbc85..da946d476cc 100644
--- a/refs/ref-cache.c
+++ b/refs/ref-cache.c
@@ -5,6 +5,7 @@ 
 #include "refs-internal.h"
 #include "ref-cache.h"
 #include "../iterator.h"
+#include "../strbuf.h"
 
 void add_entry_to_dir(struct ref_dir *dir, struct ref_entry *entry)
 {
@@ -34,6 +35,7 @@  struct ref_dir *get_ref_dir(struct ref_entry *entry)
 }
 
 struct ref_entry *create_ref_entry(const char *refname,
+				   const char *referent,
 				   const struct object_id *oid, int flag)
 {
 	struct ref_entry *ref;
@@ -41,6 +43,10 @@  struct ref_entry *create_ref_entry(const char *refname,
 	FLEX_ALLOC_STR(ref, name, refname);
 	oidcpy(&ref->u.value.oid, oid);
 	ref->flag = flag;
+
+	if (flag & REF_ISSYMREF)
+		ref->u.value.referent = xstrdup_or_null(referent);
+
 	return ref;
 }
 
@@ -66,6 +72,7 @@  static void free_ref_entry(struct ref_entry *entry)
 		 */
 		clear_ref_dir(&entry->u.subdir);
 	}
+	free(entry->u.value.referent);
 	free(entry);
 }
 
@@ -431,6 +438,7 @@  static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator)
 			level->index = -1;
 		} else {
 			iter->base.refname = entry->name;
+			iter->base.referent = entry->u.value.referent;
 			iter->base.oid = &entry->u.value.oid;
 			iter->base.flags = entry->flag;
 			return ITER_OK;
diff --git a/refs/ref-cache.h b/refs/ref-cache.h
index 31ebe24f6cf..5f04e518c37 100644
--- a/refs/ref-cache.h
+++ b/refs/ref-cache.h
@@ -42,6 +42,7 @@  struct ref_value {
 	 * referred to by the last reference in the symlink chain.
 	 */
 	struct object_id oid;
+	char *referent;
 };
 
 /*
@@ -173,6 +174,7 @@  struct ref_entry *create_dir_entry(struct ref_cache *cache,
 				   const char *dirname, size_t len);
 
 struct ref_entry *create_ref_entry(const char *refname,
+				   const char *referent,
 				   const struct object_id *oid, int flag);
 
 /*
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aaa..117ec233848 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -299,6 +299,7 @@  enum do_for_each_ref_flags {
 struct ref_iterator {
 	struct ref_iterator_vtable *vtable;
 	const char *refname;
+	const char *referent;
 	const struct object_id *oid;
 	unsigned int flags;
 };
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d3..9f724c3d632 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -494,8 +494,12 @@  static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator)
 				the_repository->hash_algo);
 			break;
 		case REFTABLE_REF_SYMREF:
-			if (!refs_resolve_ref_unsafe(&iter->refs->base, iter->ref.refname,
-						     RESOLVE_REF_READING, &iter->oid, &flags))
+			iter->base.referent = refs_resolve_ref_unsafe(&iter->refs->base,
+								      iter->ref.refname,
+								      RESOLVE_REF_READING,
+								      &iter->oid,
+								      &flags);
+			if (!iter->base.referent)
 				oidclr(&iter->oid, the_repository->hash_algo);
 			break;
 		default: