Message ID | ac0957c9e6abdc2597900573703461833e9c9d69.1722524334.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | keep track of unresolved value of symbolic-ref in ref iterators | expand |
"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes: > @@ -245,9 +245,11 @@ static void loose_fill_ref_dir_regular_file(struct files_ref_store *refs, > { > struct object_id oid; > int flag; > - > - if (!refs_resolve_ref_unsafe(&refs->base, refname, RESOLVE_REF_READING, > - &oid, &flag)) { > + const char* referent = refs_resolve_ref_unsafe(&refs->base, Style. The asterisk sticks to the pointer variable, not the base type. > + refname, > + RESOLVE_REF_READING, > + &oid, &flag); > + if (!referent) { > oidclr(&oid, the_repository->hash_algo); > flag |= REF_ISBROKEN; > } else if (is_null_oid(&oid)) { > @@ -268,7 +270,8 @@ static void loose_fill_ref_dir_regular_file(struct files_ref_store *refs, > oidclr(&oid, the_repository->hash_algo); > flag |= REF_BAD_NAME | REF_ISBROKEN; > } > - add_entry_to_dir(dir, create_ref_entry(refname, &oid, flag)); > + > + add_entry_to_dir(dir, create_ref_entry(refname, referent, &oid, flag)); > } This is a very straight-forward change, given the matching change to the ref-entry, which now has a referent member. > @@ -886,6 +889,9 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator) > iter->base.refname = iter->iter0->refname; > iter->base.oid = iter->iter0->oid; > iter->base.flags = iter->iter0->flags; > + if (iter->iter0->flags & REF_ISSYMREF) > + iter->base.referent = iter->iter0->referent; Presumably base.referent is initialized to NULL so this "if" statement does not need an else clause? I am primarily wondering why this needs to be conditional. We are making verbatim copy of the flags word from *iter->iter0 to iter->base; if iter0 is symref we want to mark base also as symref, If iter0 is not a symref, then we want to mark base also not a symref, presumably. So wouldn't it make sense to just say iter->base.referent = iter->iter0->referent; here, regardless of what iter->iter0->flags say about symref-ness of the thing? Because ... > diff --git a/refs/iterator.c b/refs/iterator.c > index d355ebf0d59..26acb6bd640 100644 > --- a/refs/iterator.c > +++ b/refs/iterator.c > @@ -7,6 +7,7 @@ > #include "refs.h" > #include "refs/refs-internal.h" > #include "iterator.h" > +#include "strbuf.h" > > int ref_iterator_advance(struct ref_iterator *ref_iterator) > { > @@ -29,6 +30,7 @@ void base_ref_iterator_init(struct ref_iterator *iter, > { > iter->vtable = vtable; > iter->refname = NULL; > + iter->referent = NULL; > iter->oid = NULL; > iter->flags = 0; > } > @@ -199,6 +201,7 @@ static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator) > } > > if (selection & ITER_YIELD_CURRENT) { > + iter->base.referent = (*iter->current)->referent; > iter->base.refname = (*iter->current)->refname; > iter->base.oid = (*iter->current)->oid; > iter->base.flags = (*iter->current)->flags; ... other parts of the API seem to follow that philosophy. In other words, the invariant of this code is that .referent is non-NULL if and only if the ref is a symref, and that invariant is trusted without checking with .flags member. But the earlier hunk that copied iter0 to base did not seem to be using that invariant, which made it look inconsistent. > struct ref_entry *create_ref_entry(const char *refname, > + const char *referent, > const struct object_id *oid, int flag) > { > struct ref_entry *ref; > @@ -41,6 +43,10 @@ struct ref_entry *create_ref_entry(const char *refname, > FLEX_ALLOC_STR(ref, name, refname); > oidcpy(&ref->u.value.oid, oid); > ref->flag = flag; > + > + if (flag & REF_ISSYMREF) > + ref->u.value.referent = xstrdup_or_null(referent); OK. ref_value now has referent next to the existing oid, but that member only makes sense when flag says it is a symref. Curiously, that information is missing from ref_value struct, so by looking at a ref_value alone, we cannot tell if we should trust the value in the .referent member? Makes me wonder if we should follow the same "ignore what the flag says when filling the .referent member; if the ref is not a symref, the referent variable is NULL, and if it is, referent is never NULL" pattern? Then ref->u.value.referent is _always_ defined---the current code says "the u.value.referent member is undefined for ref that is not a symref", but with the suggested change, it will be "the u.value.referent member is NULL for ref that is not a symref, and for a symref, it is the value of the symref". > return ref; > } > > @@ -66,6 +72,7 @@ static void free_ref_entry(struct ref_entry *entry) > */ > clear_ref_dir(&entry->u.subdir); > } > + free(entry->u.value.referent); And that would match what is done here. We do not say "entry->flag says it is not a symref, so do not bother freeing u.value.referent". > @@ -431,6 +438,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) > level->index = -1; > } else { > iter->base.refname = entry->name; > + iter->base.referent = entry->u.value.referent; > iter->base.oid = &entry->u.value.oid; > iter->base.flags = entry->flag; > return ITER_OK; > diff --git a/refs/ref-cache.h b/refs/ref-cache.h > index 31ebe24f6cf..5f04e518c37 100644 > --- a/refs/ref-cache.h > +++ b/refs/ref-cache.h > @@ -42,6 +42,7 @@ struct ref_value { > * referred to by the last reference in the symlink chain. > */ > struct object_id oid; > + char *referent; > }; > > /* > @@ -173,6 +174,7 @@ struct ref_entry *create_dir_entry(struct ref_cache *cache, > const char *dirname, size_t len); > > struct ref_entry *create_ref_entry(const char *refname, > + const char *referent, > const struct object_id *oid, int flag); > > /* > diff --git a/refs/refs-internal.h b/refs/refs-internal.h > index fa975d69aaa..117ec233848 100644 > --- a/refs/refs-internal.h > +++ b/refs/refs-internal.h > @@ -299,6 +299,7 @@ enum do_for_each_ref_flags { > struct ref_iterator { > struct ref_iterator_vtable *vtable; > const char *refname; > + const char *referent; > const struct object_id *oid; > unsigned int flags; > }; > diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c > index fbe74c239d3..9f724c3d632 100644 > --- a/refs/reftable-backend.c > +++ b/refs/reftable-backend.c > @@ -494,8 +494,12 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) > the_repository->hash_algo); > break; > case REFTABLE_REF_SYMREF: > - if (!refs_resolve_ref_unsafe(&iter->refs->base, iter->ref.refname, > - RESOLVE_REF_READING, &iter->oid, &flags)) > + iter->base.referent = refs_resolve_ref_unsafe(&iter->refs->base, > + iter->ref.refname, > + RESOLVE_REF_READING, > + &iter->oid, > + &flags); > + if (!iter->base.referent) > oidclr(&iter->oid, the_repository->hash_algo); > break; > default:
On Thu, Aug 01, 2024 at 09:41:03AM -0700, Junio C Hamano wrote: > "John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes: > > @@ -886,6 +889,9 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator) > > iter->base.refname = iter->iter0->refname; > > iter->base.oid = iter->iter0->oid; > > iter->base.flags = iter->iter0->flags; > > + if (iter->iter0->flags & REF_ISSYMREF) > > + iter->base.referent = iter->iter0->referent; > > Presumably base.referent is initialized to NULL so this "if" > statement does not need an else clause? This function typically ends up being called in a loop though. So without the else clause, wouldn't we potentially leak the value of a preceding ref into subsequent iterations like this? > I am primarily wondering why this needs to be conditional. We are > making verbatim copy of the flags word from *iter->iter0 to > iter->base; if iter0 is symref we want to mark base also as symref, > If iter0 is not a symref, then we want to mark base also not a > symref, presumably. So wouldn't it make sense to just say > > iter->base.referent = iter->iter0->referent; > > here, regardless of what iter->iter0->flags say about symref-ness of > the thing? Because ... > > diff --git a/refs/iterator.c b/refs/iterator.c > > index d355ebf0d59..26acb6bd640 100644 > > --- a/refs/iterator.c > > +++ b/refs/iterator.c > > @@ -7,6 +7,7 @@ > > #include "refs.h" > > #include "refs/refs-internal.h" > > #include "iterator.h" > > +#include "strbuf.h" > > > > int ref_iterator_advance(struct ref_iterator *ref_iterator) > > { > > @@ -29,6 +30,7 @@ void base_ref_iterator_init(struct ref_iterator *iter, > > { > > iter->vtable = vtable; > > iter->refname = NULL; > > + iter->referent = NULL; > > iter->oid = NULL; > > iter->flags = 0; > > } > > @@ -199,6 +201,7 @@ static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator) > > } > > > > if (selection & ITER_YIELD_CURRENT) { > > + iter->base.referent = (*iter->current)->referent; > > iter->base.refname = (*iter->current)->refname; > > iter->base.oid = (*iter->current)->oid; > > iter->base.flags = (*iter->current)->flags; > > ... other parts of the API seem to follow that philosophy. > > In other words, the invariant of this code is that .referent is > non-NULL if and only if the ref is a symref, and that invariant is > trusted without checking with .flags member. But the earlier hunk > that copied iter0 to base did not seem to be using that invariant, > which made it look inconsistent. Agreed. > > struct ref_entry *create_ref_entry(const char *refname, > > + const char *referent, > > const struct object_id *oid, int flag) > > { > > struct ref_entry *ref; > > @@ -41,6 +43,10 @@ struct ref_entry *create_ref_entry(const char *refname, > > FLEX_ALLOC_STR(ref, name, refname); > > oidcpy(&ref->u.value.oid, oid); > > ref->flag = flag; > > + > > + if (flag & REF_ISSYMREF) > > + ref->u.value.referent = xstrdup_or_null(referent); > > OK. ref_value now has referent next to the existing oid, but that > member only makes sense when flag says it is a symref. > > Curiously, that information is missing from ref_value struct, so by > looking at a ref_value alone, we cannot tell if we should trust the > value in the .referent member? > > Makes me wonder if we should follow the same "ignore what the flag > says when filling the .referent member; if the ref is not a symref, > the referent variable is NULL, and if it is, referent is never NULL" > pattern? Then ref->u.value.referent is _always_ defined---the > current code says "the u.value.referent member is undefined for ref > that is not a symref", but with the suggested change, it will be > "the u.value.referent member is NULL for ref that is not a symref, > and for a symref, it is the value of the symref". Yeah, I think that would be preferable indeed. Patrick
Patrick Steinhardt <ps@pks.im> writes: > On Thu, Aug 01, 2024 at 09:41:03AM -0700, Junio C Hamano wrote: >> "John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes: >> > @@ -886,6 +889,9 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator) >> > iter->base.refname = iter->iter0->refname; >> > iter->base.oid = iter->iter0->oid; >> > iter->base.flags = iter->iter0->flags; >> > + if (iter->iter0->flags & REF_ISSYMREF) >> > + iter->base.referent = iter->iter0->referent; >> >> Presumably base.referent is initialized to NULL so this "if" >> statement does not need an else clause? > > This function typically ends up being called in a loop though. So > without the else clause, wouldn't we potentially leak the value of a > preceding ref into subsequent iterations like this? OK, so this does need to clear it when we tell the caller we have non SYMREF, as we do want to show NULL as base.referent to the caller in such a case. Thanks. It does reinforce my larger point, which was: >> Makes me wonder if we should follow the same "ignore what the flag >> says when filling the .referent member; if the ref is not a symref, >> the referent variable is NULL, and if it is, referent is never NULL" >> pattern? Then ref->u.value.referent is _always_ defined---the >> current code says "the u.value.referent member is undefined for ref >> that is not a symref", but with the suggested change, it will be >> "the u.value.referent member is NULL for ref that is not a symref, >> and for a symref, it is the value of the symref". > > Yeah, I think that would be preferable indeed. In other words, with .referent member introduced, checking for (.flags & REF_ISSYMREF) becomes a redundant&duplicated bit of information, as the bit should exactly match the non-NULL ness of the .referent member. Thanks.
diff --git a/refs/files-backend.c b/refs/files-backend.c index aa52d9be7c7..7640318cba8 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -245,9 +245,11 @@ static void loose_fill_ref_dir_regular_file(struct files_ref_store *refs, { struct object_id oid; int flag; - - if (!refs_resolve_ref_unsafe(&refs->base, refname, RESOLVE_REF_READING, - &oid, &flag)) { + const char* referent = refs_resolve_ref_unsafe(&refs->base, + refname, + RESOLVE_REF_READING, + &oid, &flag); + if (!referent) { oidclr(&oid, the_repository->hash_algo); flag |= REF_ISBROKEN; } else if (is_null_oid(&oid)) { @@ -268,7 +270,8 @@ static void loose_fill_ref_dir_regular_file(struct files_ref_store *refs, oidclr(&oid, the_repository->hash_algo); flag |= REF_BAD_NAME | REF_ISBROKEN; } - add_entry_to_dir(dir, create_ref_entry(refname, &oid, flag)); + + add_entry_to_dir(dir, create_ref_entry(refname, referent, &oid, flag)); } /* @@ -886,6 +889,9 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator) iter->base.refname = iter->iter0->refname; iter->base.oid = iter->iter0->oid; iter->base.flags = iter->iter0->flags; + if (iter->iter0->flags & REF_ISSYMREF) + iter->base.referent = iter->iter0->referent; + return ITER_OK; } diff --git a/refs/iterator.c b/refs/iterator.c index d355ebf0d59..26acb6bd640 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -7,6 +7,7 @@ #include "refs.h" #include "refs/refs-internal.h" #include "iterator.h" +#include "strbuf.h" int ref_iterator_advance(struct ref_iterator *ref_iterator) { @@ -29,6 +30,7 @@ void base_ref_iterator_init(struct ref_iterator *iter, { iter->vtable = vtable; iter->refname = NULL; + iter->referent = NULL; iter->oid = NULL; iter->flags = 0; } @@ -199,6 +201,7 @@ static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator) } if (selection & ITER_YIELD_CURRENT) { + iter->base.referent = (*iter->current)->referent; iter->base.refname = (*iter->current)->refname; iter->base.oid = (*iter->current)->oid; iter->base.flags = (*iter->current)->flags; diff --git a/refs/ref-cache.c b/refs/ref-cache.c index 4ce519bbc85..da946d476cc 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -5,6 +5,7 @@ #include "refs-internal.h" #include "ref-cache.h" #include "../iterator.h" +#include "../strbuf.h" void add_entry_to_dir(struct ref_dir *dir, struct ref_entry *entry) { @@ -34,6 +35,7 @@ struct ref_dir *get_ref_dir(struct ref_entry *entry) } struct ref_entry *create_ref_entry(const char *refname, + const char *referent, const struct object_id *oid, int flag) { struct ref_entry *ref; @@ -41,6 +43,10 @@ struct ref_entry *create_ref_entry(const char *refname, FLEX_ALLOC_STR(ref, name, refname); oidcpy(&ref->u.value.oid, oid); ref->flag = flag; + + if (flag & REF_ISSYMREF) + ref->u.value.referent = xstrdup_or_null(referent); + return ref; } @@ -66,6 +72,7 @@ static void free_ref_entry(struct ref_entry *entry) */ clear_ref_dir(&entry->u.subdir); } + free(entry->u.value.referent); free(entry); } @@ -431,6 +438,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) level->index = -1; } else { iter->base.refname = entry->name; + iter->base.referent = entry->u.value.referent; iter->base.oid = &entry->u.value.oid; iter->base.flags = entry->flag; return ITER_OK; diff --git a/refs/ref-cache.h b/refs/ref-cache.h index 31ebe24f6cf..5f04e518c37 100644 --- a/refs/ref-cache.h +++ b/refs/ref-cache.h @@ -42,6 +42,7 @@ struct ref_value { * referred to by the last reference in the symlink chain. */ struct object_id oid; + char *referent; }; /* @@ -173,6 +174,7 @@ struct ref_entry *create_dir_entry(struct ref_cache *cache, const char *dirname, size_t len); struct ref_entry *create_ref_entry(const char *refname, + const char *referent, const struct object_id *oid, int flag); /* diff --git a/refs/refs-internal.h b/refs/refs-internal.h index fa975d69aaa..117ec233848 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -299,6 +299,7 @@ enum do_for_each_ref_flags { struct ref_iterator { struct ref_iterator_vtable *vtable; const char *refname; + const char *referent; const struct object_id *oid; unsigned int flags; }; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index fbe74c239d3..9f724c3d632 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -494,8 +494,12 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) the_repository->hash_algo); break; case REFTABLE_REF_SYMREF: - if (!refs_resolve_ref_unsafe(&iter->refs->base, iter->ref.refname, - RESOLVE_REF_READING, &iter->oid, &flags)) + iter->base.referent = refs_resolve_ref_unsafe(&iter->refs->base, + iter->ref.refname, + RESOLVE_REF_READING, + &iter->oid, + &flags); + if (!iter->base.referent) oidclr(&iter->oid, the_repository->hash_algo); break; default: