diff mbox series

[v2,3/3] ref-filter: populate symref from iterator

Message ID 3e147e7d850773f44b48d1b86e89aef1415a0ccd.1722524334.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series keep track of unresolved value of symbolic-ref in ref iterators | expand

Commit Message

John Cai Aug. 1, 2024, 2:58 p.m. UTC
From: John Cai <johncai86@gmail.com>

With a previous commit, the reference the symbolic ref points to is saved
in the ref iterator records. Instead of making a separate call to
resolve_refdup() each time, we can just populate the ref_array_item with
the value from the iterator.

Signed-off-by: John Cai <johncai86@gmail.com>
---
 ref-filter.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

Comments

Junio C Hamano Aug. 1, 2024, 4:43 p.m. UTC | #1
"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> @@ -2852,6 +2852,8 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const struct
>  	ref->commit = commit;
>  	ref->flag = flag;
>  	ref->kind = kind;
> +	if (flag & REF_ISSYMREF)
> +		ref->symref = xstrdup_or_null(referent);

The same reaction as [1/3].  Doesn't the null-ness of referent
convey the same information as the ISSYMREF bit in flag?  IOW,
can't we do this assignment unconditionally?
Junio C Hamano Aug. 1, 2024, 4:51 p.m. UTC | #2
"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: John Cai <johncai86@gmail.com>
>
> With a previous commit, the reference the symbolic ref points to is saved
> in the ref iterator records. Instead of making a separate call to
> resolve_refdup() each time, we can just populate the ref_array_item with
> the value from the iterator.
>
> Signed-off-by: John Cai <johncai86@gmail.com>
> ---
>  ref-filter.c | 12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/ref-filter.c b/ref-filter.c
> index 39997890feb..08997e59662 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -2783,7 +2783,7 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname)
>  	return ref_kind_from_refname(refname);
>  }
>  
> -static struct ref_array_item *apply_ref_filter(const char *refname, const struct object_id *oid,
> +static struct ref_array_item *apply_ref_filter(const char *refname, const char *referent, const struct object_id *oid,
>  			    int flag, struct ref_filter *filter)
>  {
>  	struct ref_array_item *ref;
> @@ -2852,6 +2852,8 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const struct
>  	ref->commit = commit;
>  	ref->flag = flag;
>  	ref->kind = kind;
> +	if (flag & REF_ISSYMREF)
> +		ref->symref = xstrdup_or_null(referent);
>  
>  	return ref;
>  }

What is curious is that we do not lose any code from
populate_value() with this change.

Is that because of this piece of code near the beginning of it?

	CALLOC_ARRAY(ref->value, used_atom_cnt);

	if (need_symref && (ref->flag & REF_ISSYMREF) && !ref->symref) {
		ref->symref = refs_resolve_refdup(get_main_ref_store(the_repository),
						  ref->refname,
						  RESOLVE_REF_READING,
						  NULL, NULL);
		if (!ref->symref)
			ref->symref = xstrdup("");
	}

That is, if we somehow know the value of ref->symref for a ref that
is known to be a symbolic ref (and when we know we need symref
information in the output), we do not bother calling refs_resolve
here to obtain the value.

Thanks.
Junio C Hamano Aug. 1, 2024, 4:54 p.m. UTC | #3
Junio C Hamano <gitster@pobox.com> writes:

>> @@ -2852,6 +2852,8 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const struct
>>  	ref->commit = commit;
>>  	ref->flag = flag;
>>  	ref->kind = kind;
>> +	if (flag & REF_ISSYMREF)
>> +		ref->symref = xstrdup_or_null(referent);
>>  
>>  	return ref;
>>  }
>
> What is curious is that we do not lose any code from
> populate_value() with this change.
>
> Is that because of this piece of code near the beginning of it?
>
> 	CALLOC_ARRAY(ref->value, used_atom_cnt);
>
> 	if (need_symref && (ref->flag & REF_ISSYMREF) && !ref->symref) {
> 		ref->symref = refs_resolve_refdup(get_main_ref_store(the_repository),
> 						  ref->refname,
> 						  RESOLVE_REF_READING,
> 						  NULL, NULL);
> 		if (!ref->symref)
> 			ref->symref = xstrdup("");
> 	}
>
> That is, if we somehow know the value of ref->symref for a ref that
> is known to be a symbolic ref (and when we know we need symref
> information in the output), we do not bother calling refs_resolve
> here to obtain the value.

I forgot to ask the real question.  With your change in place, does
this "lazily fill ref->symref if it hasn't been discovered yet" code
still trigger?  Under what condition?  Or is this now a dead code?

Thanks.
John Cai Aug. 6, 2024, 7:49 p.m. UTC | #4
Hi Junio,

On 1 Aug 2024, at 12:54, Junio C Hamano wrote:

> Junio C Hamano <gitster@pobox.com> writes:
>
>>> @@ -2852,6 +2852,8 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const struct
>>>  	ref->commit = commit;
>>>  	ref->flag = flag;
>>>  	ref->kind = kind;
>>> +	if (flag & REF_ISSYMREF)
>>> +		ref->symref = xstrdup_or_null(referent);
>>>
>>>  	return ref;
>>>  }
>>
>> What is curious is that we do not lose any code from
>> populate_value() with this change.
>>
>> Is that because of this piece of code near the beginning of it?
>>
>> 	CALLOC_ARRAY(ref->value, used_atom_cnt);
>>
>> 	if (need_symref && (ref->flag & REF_ISSYMREF) && !ref->symref) {
>> 		ref->symref = refs_resolve_refdup(get_main_ref_store(the_repository),
>> 						  ref->refname,
>> 						  RESOLVE_REF_READING,
>> 						  NULL, NULL);
>> 		if (!ref->symref)
>> 			ref->symref = xstrdup("");
>> 	}
>>
>> That is, if we somehow know the value of ref->symref for a ref that
>> is known to be a symbolic ref (and when we know we need symref
>> information in the output), we do not bother calling refs_resolve
>> here to obtain the value.
>
> I forgot to ask the real question.  With your change in place, does
> this "lazily fill ref->symref if it hasn't been discovered yet" code
> still trigger?  Under what condition?  Or is this now a dead code?

Yes that's a good question. I took a look and it seems like in *most* cases by
the time populate_value() is called, apply_ref_filter() has already been called that
populates the symref member of ref_array_item.

populate_value() gets called by get_ref_atom_value() which gets called by both

1. format_ref_array_item()
2. cmp_ref_sorting()

In the case of [2], the callchain starts with filter_and_format_refs() which
calls ref_array_sort() that eventually calls populate_value(). Before
ref_array_sort() is called, filter_refs() is called which ends up calling
do_filter_refs() with filter_one(), leading to apply_ref_filter().

In the case of [1] however, there are a couple of code paths that call
populate_value() without apply_ref_filter() ever being called.

pretty_print_ref() directly calls format_ref_array_item() ->
get_ref_atom_value() -> populate_value(). However, apply_ref_filter() is not
called which means the symref will not be populated.

Looking through the codebase, this function is only called in builtin/tag.c and
bulitin/verify-tag.c in the `git tag -v` codepath. So it seems that if we got
rid of this block of code in populate_value(), only in the case where `git
tag -v --format='%(symref)'` on a symbolic ref pointing to a tag would the
symref be missing.

But I don't even know if this is possible. I tried this locally and got the
error:


$ git tag -a -s -m "version 1" v1 refs/heads/master
$ git symbolic-ref refs/tags/symbolic-v1 refs/tags/v1
$ git tag -v --format='(%symref)'

error: tag 'refs/tags/symbolic-v1' not found.


So practically speaking, I think we are safe to remove. However, from a
future-proof point of view, anyone in the future who calls pretty_print_ref() would also need to be sure to populate the symref member in ref_array_item.

So perhaps from a code durability standpoint we should keep that block?

>
> Thanks.
Junio C Hamano Aug. 6, 2024, 8:17 p.m. UTC | #5
John Cai <johncai86@gmail.com> writes:

> Yes that's a good question. I took a look and it seems like in *most* cases by
> the time populate_value() is called, apply_ref_filter() has already been called that
> populates the symref member of ref_array_item.
>
> populate_value() gets called by get_ref_atom_value() which gets called by both
>
> 1. format_ref_array_item()
> 2. cmp_ref_sorting()
>
> In the case of [2], the callchain starts with filter_and_format_refs() which
> calls ref_array_sort() that eventually calls populate_value(). Before
> ref_array_sort() is called, filter_refs() is called which ends up calling
> do_filter_refs() with filter_one(), leading to apply_ref_filter().
>
> In the case of [1] however, there are a couple of code paths that call
> populate_value() without apply_ref_filter() ever being called.
>
> pretty_print_ref() directly calls format_ref_array_item() ->
> get_ref_atom_value() -> populate_value(). However, apply_ref_filter() is not
> called which means the symref will not be populated.

I am perfectly OK to keep the "fallback" code, even if you haven't
found a concrete reproduction to trigger it.  Removing it is not
part of what this series is trying to do, anyway.  The theme of the
topic has always been "optimize when we can", at least to me.

Having said that, if we somehow can give the analysis you did above
as hint to future developers, then they can start from there when
they consider if they can remove the "fallback" code.  It would help
even it just said

    /*
     * NEEDSWORK: this might have become a dead code after
     * optimization to grab symref target in apply_ref_filter().
     */

and nothing else.
diff mbox series

Patch

diff --git a/ref-filter.c b/ref-filter.c
index 39997890feb..08997e59662 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2783,7 +2783,7 @@  static int filter_ref_kind(struct ref_filter *filter, const char *refname)
 	return ref_kind_from_refname(refname);
 }
 
-static struct ref_array_item *apply_ref_filter(const char *refname, const struct object_id *oid,
+static struct ref_array_item *apply_ref_filter(const char *refname, const char *referent, const struct object_id *oid,
 			    int flag, struct ref_filter *filter)
 {
 	struct ref_array_item *ref;
@@ -2852,6 +2852,8 @@  static struct ref_array_item *apply_ref_filter(const char *refname, const struct
 	ref->commit = commit;
 	ref->flag = flag;
 	ref->kind = kind;
+	if (flag & REF_ISSYMREF)
+		ref->symref = xstrdup_or_null(referent);
 
 	return ref;
 }
@@ -2865,12 +2867,12 @@  struct ref_filter_cbdata {
  * A call-back given to for_each_ref().  Filter refs and keep them for
  * later object processing.
  */
-static int filter_one(const char *refname, const char *referent UNUSED, const struct object_id *oid, int flag, void *cb_data)
+static int filter_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data)
 {
 	struct ref_filter_cbdata *ref_cbdata = cb_data;
 	struct ref_array_item *ref;
 
-	ref = apply_ref_filter(refname, oid, flag, ref_cbdata->filter);
+	ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter);
 	if (ref)
 		ref_array_append(ref_cbdata->array, ref);
 
@@ -2900,13 +2902,13 @@  struct ref_filter_and_format_cbdata {
 	} internal;
 };
 
-static int filter_and_format_one(const char *refname, const char *referent UNUSED, const struct object_id *oid, int flag, void *cb_data)
+static int filter_and_format_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data)
 {
 	struct ref_filter_and_format_cbdata *ref_cbdata = cb_data;
 	struct ref_array_item *ref;
 	struct strbuf output = STRBUF_INIT, err = STRBUF_INIT;
 
-	ref = apply_ref_filter(refname, oid, flag, ref_cbdata->filter);
+	ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter);
 	if (!ref)
 		return 0;