diff mbox series

[1/8,GSOC] ref-filter: add obj-type check in grab contents

Message ID 48d256db5c349c1fa0615bb60d74039c78a831fd.1623496458.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series cat-file: reuse ref-filter logic | expand

Commit Message

ZheNing Hu June 12, 2021, 11:14 a.m. UTC
From: ZheNing Hu <adlternative@gmail.com>

Only tag and commit objects use `grab_sub_body_contents()` to grab
object contents in the current codebase.  We want to teach the
function to also handle blobs and trees to get their raw data,
without parsing a blob (whose contents looks like a commit or a tag)
incorrectly as a commit or a tag.

Skip the block of code that is specific to handling commits and tags
early when the given object is of a wrong type to help later
addition to handle other types of objects in this function.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 ref-filter.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

Comments

Ævar Arnfjörð Bjarmason June 17, 2021, 7:04 a.m. UTC | #1
On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:

> From: ZheNing Hu <adlternative@gmail.com>
>
> Only tag and commit objects use `grab_sub_body_contents()` to grab
> object contents in the current codebase.  We want to teach the
> function to also handle blobs and trees to get their raw data,
> without parsing a blob (whose contents looks like a commit or a tag)
> incorrectly as a commit or a tag.
>
> Skip the block of code that is specific to handling commits and tags
> early when the given object is of a wrong type to help later
> addition to handle other types of objects in this function.
>
> Mentored-by: Christian Couder <christian.couder@gmail.com>
> Mentored-by: Hariom Verma <hariom18599@gmail.com>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> ---
>  ref-filter.c | 24 +++++++++++++++---------
>  1 file changed, 15 insertions(+), 9 deletions(-)
>
> diff --git a/ref-filter.c b/ref-filter.c
> index 4db0e40ff4c6..5cee6512fbaf 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -1356,11 +1356,12 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size
>  }
>  
>  /* See grab_values */
> -static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
> +static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
>  {
>  	int i;
>  	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
>  	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
> +	void *buf = data->content;
>  
>  	for (i = 0; i < used_atom_cnt; i++) {
>  		struct used_atom *atom = &used_atom[i];
> @@ -1371,10 +1372,13 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
>  			continue;
>  		if (deref)
>  			name++;
> -		if (strcmp(name, "body") &&
> -		    !starts_with(name, "subject") &&
> -		    !starts_with(name, "trailers") &&
> -		    !starts_with(name, "contents"))
> +
> +		if ((data->type != OBJ_TAG &&
> +		     data->type != OBJ_COMMIT) ||
> +		    (strcmp(name, "body") &&
> +		     !starts_with(name, "subject") &&
> +		     !starts_with(name, "trailers") &&
> +		     !starts_with(name, "contents")))

We have 4 "real" object types, commit, tree, blob, tag. Do you really
mean "not tag or commit" here, don't you mean "is tree or blob" instead?
I.e. do we really want to pass OBJ_NONE etc. here?

>  			continue;
>  		if (!subpos)
>  			find_subpos(buf,
> @@ -1438,17 +1442,19 @@ static void fill_missing_values(struct atom_value *val)
>   * pointed at by the ref itself; otherwise it is the object the
>   * ref (which is a tag) refers to.
>   */
> -static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
> +static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
>  {
> +	void *buf = data->content;
> +
>  	switch (obj->type) {
>  	case OBJ_TAG:
>  		grab_tag_values(val, deref, obj);
> -		grab_sub_body_contents(val, deref, buf);
> +		grab_sub_body_contents(val, deref, data);
>  		grab_person("tagger", val, deref, buf);
>  		break;
>  	case OBJ_COMMIT:
>  		grab_commit_values(val, deref, obj);
> -		grab_sub_body_contents(val, deref, buf);
> +		grab_sub_body_contents(val, deref, data);
>  		grab_person("author", val, deref, buf);
>  		grab_person("committer", val, deref, buf);
>  		break;
> @@ -1678,7 +1684,7 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
>  			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
>  					       oid_to_hex(&oi->oid), ref->refname);
>  		}
> -		grab_values(ref->value, deref, *obj, oi->content);
> +		grab_values(ref->value, deref, *obj, oi);
>  	}
>  
>  	grab_common_values(ref->value, deref, oi);
Junio C Hamano June 17, 2021, 7:28 a.m. UTC | #2
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

>> -		if (strcmp(name, "body") &&
>> -		    !starts_with(name, "subject") &&
>> -		    !starts_with(name, "trailers") &&
>> -		    !starts_with(name, "contents"))
>> +
>> +		if ((data->type != OBJ_TAG &&
>> +		     data->type != OBJ_COMMIT) ||
>> +		    (strcmp(name, "body") &&
>> +		     !starts_with(name, "subject") &&
>> +		     !starts_with(name, "trailers") &&
>> +		     !starts_with(name, "contents")))
>
> We have 4 "real" object types, commit, tree, blob, tag. Do you really
> mean "not tag or commit" here, don't you mean "is tree or blob" instead?
> I.e. do we really want to pass OBJ_NONE etc. here?
>
>>  			continue;

If somebody throws OBJ_NONE at us by mistake, we do not want to
handle such an object and try to extract the subject member from it
anyway, no?

The intent of the code here, before the patch, is that "what we do
after the control flow passes this point is about the body, subject,
trailers, and contents request, so everybody else should go to the
next iteration".  The caller used to give us an object compatible
with these four types of requests, now the caller may throw others,
hence "by the way, we know these four kinds of requests make sense
only for tags and commits, so everybody else should go to the next
iteration, too" would be a natural thing to add.  So in that sense,
I prefer it over "we know these four types of requests do not make
sense for blobs and trees".
diff mbox series

Patch

diff --git a/ref-filter.c b/ref-filter.c
index 4db0e40ff4c6..5cee6512fbaf 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1356,11 +1356,12 @@  static void append_lines(struct strbuf *out, const char *buf, unsigned long size
 }
 
 /* See grab_values */
-static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
+static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
 {
 	int i;
 	const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
 	size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
+	void *buf = data->content;
 
 	for (i = 0; i < used_atom_cnt; i++) {
 		struct used_atom *atom = &used_atom[i];
@@ -1371,10 +1372,13 @@  static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			continue;
 		if (deref)
 			name++;
-		if (strcmp(name, "body") &&
-		    !starts_with(name, "subject") &&
-		    !starts_with(name, "trailers") &&
-		    !starts_with(name, "contents"))
+
+		if ((data->type != OBJ_TAG &&
+		     data->type != OBJ_COMMIT) ||
+		    (strcmp(name, "body") &&
+		     !starts_with(name, "subject") &&
+		     !starts_with(name, "trailers") &&
+		     !starts_with(name, "contents")))
 			continue;
 		if (!subpos)
 			find_subpos(buf,
@@ -1438,17 +1442,19 @@  static void fill_missing_values(struct atom_value *val)
  * pointed at by the ref itself; otherwise it is the object the
  * ref (which is a tag) refers to.
  */
-static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf)
+static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
 {
+	void *buf = data->content;
+
 	switch (obj->type) {
 	case OBJ_TAG:
 		grab_tag_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("tagger", val, deref, buf);
 		break;
 	case OBJ_COMMIT:
 		grab_commit_values(val, deref, obj);
-		grab_sub_body_contents(val, deref, buf);
+		grab_sub_body_contents(val, deref, data);
 		grab_person("author", val, deref, buf);
 		grab_person("committer", val, deref, buf);
 		break;
@@ -1678,7 +1684,7 @@  static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi->content);
+		grab_values(ref->value, deref, *obj, oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);