diff mbox series

[8/8,GSOC] cat-file: re-implement --textconv, --filters options

Message ID 0004d5b24a0fb735d7fa9cb9a8e214d6e838baeb.1623496458.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series cat-file: reuse ref-filter logic | expand

Commit Message

ZheNing Hu June 12, 2021, 11:14 a.m. UTC
From: ZheNing Hu <adlternative@gmail.com>

After cat-file reuses the ref-filter logic, we re-implement the
functions of --textconv and --filters options.

Add members `use_textconv` and `use_filters` in struct `ref_format`,
and use global variables `use_filters` and `use_textconv` in
`ref-filter.c`, so that we can filter the content of the object
in get_object(). Use `actual_oi` to record the real expand_data:
it may point to the original `oi` or the `act_oi` processed by
`textconv_object()` or `convert_to_working_tree()`. `grab_values()`
will grab the contents of `actual_oi` and `grab_common_values()`
to grab the contents of origin `oi`, this ensures that `%(objectsize)`
still uses the size of the unfiltered data.

In `get_object()`, we made an optimization: Firstly, get the size and
type of the object instead of directly getting the object data.
If using --textconv, after successfully obtaining the filtered object
data, an extra oid_object_info_extended() will be skipped, which can
reduce the cost of object data copy; If using --filter, the data of
the object first will be getted first, and then convert_to_working_tree()
will be used to get the filtered object data.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 builtin/cat-file.c |  5 ++++
 ref-filter.c       | 66 ++++++++++++++++++++++++++++++++++++++++++++--
 ref-filter.h       |  5 ++--
 3 files changed, 72 insertions(+), 4 deletions(-)

Comments

Ævar Arnfjörð Bjarmason June 17, 2021, 7:18 a.m. UTC | #1
On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:

> From: ZheNing Hu <adlternative@gmail.com>

>  	opt->format.format = format.buf;
> +	if (opt->cmdmode == 'c')
> +		opt->format.use_textconv = 1;
> +	if (opt->cmdmode == 'w')
> +		opt->format.use_filters = 1;
> +


Style nit: if/if -> if/else if, both can't be true.

> +		/* get the type and size */
> +		if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
> +					OBJECT_INFO_LOOKUP_REPLACE))
> +			return strbuf_addf_ret(err, 1, _("%s missing"),
> +					       oid_to_hex(&oi->oid));
> +
> +		oi->info.sizep = NULL;
> +		oi->info.typep = NULL;
> +		oi->info.contentp = temp_contentp;

Here we have a call function and error if bad, then proceed to populate
stuff (good)...

> +
> +		if (use_textconv) {
> +			act_oi = *oi;
> +
> +			if(!ref->rest)
> +				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
> +						       oid_to_hex(&act_oi.oid));
> +			if (act_oi.type == OBJ_BLOB) {
> +				if (textconv_object(the_repository,
> +						    ref->rest, 0100644, &act_oi.oid,
> +						    1, (char **)(&act_oi.content), &act_oi.size)) {
> +					actual_oi = &act_oi;
> +					goto success;
> +				}
> +			}
> +		}


Maybe change the if (A) { if (B) {} ) to if (A && B) {} ?

>  	}
>  	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
>  				     OBJECT_INFO_LOOKUP_REPLACE))
> @@ -1748,19 +1786,43 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
>  		BUG("Object size is less than zero.");
>  
>  	if (oi->info.contentp) {
> -		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
> +		if (use_filters) {
> +			if(!ref->rest)
> +				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
> +						       oid_to_hex(&oi->oid));
> +			if (oi->type == OBJ_BLOB) {
> +				struct strbuf strbuf = STRBUF_INIT;
> +				struct checkout_metadata meta;
> +				act_oi = *oi;
> +
> +				init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
> +				if (convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta)) {
> +					act_oi.size = strbuf.len;
> +					act_oi.content = strbuf_detach(&strbuf, NULL);
> +					actual_oi = &act_oi;
> +				} else {
> +					die("could not convert '%s' %s",
> +					    oid_to_hex(&oi->oid), ref->rest);
> +				}

... but here instead of "if (!x) { bad } do stuff" we have if (x) {do
stuff} else { bad }". Better to get the die out of the way, and avoid
the indentation on the "do stuff" IMO.


> -#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
> +#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, 0, 0, -1 }

Not a new problem, but an earlier cleanup to simply change this to
designated initializers would be welcome, see recent work of mine in
fsck.h for an example.

I.e. we keep churning on changing this *_INIT just to populate the one
-1 field at the end, can also be simply:

    #define FOO_INIT { .that_field = 1 }
ZheNing Hu June 17, 2021, 9:53 a.m. UTC | #2
Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月17日周四 下午3:22写道:
>
>
> On Sat, Jun 12 2021, ZheNing Hu via GitGitGadget wrote:
>
> > From: ZheNing Hu <adlternative@gmail.com>
>
> >       opt->format.format = format.buf;
> > +     if (opt->cmdmode == 'c')
> > +             opt->format.use_textconv = 1;
> > +     if (opt->cmdmode == 'w')
> > +             opt->format.use_filters = 1;
> > +
>
>
> Style nit: if/if -> if/else if, both can't be true.
>

Ok.

> > +             /* get the type and size */
> > +             if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
> > +                                     OBJECT_INFO_LOOKUP_REPLACE))
> > +                     return strbuf_addf_ret(err, 1, _("%s missing"),
> > +                                            oid_to_hex(&oi->oid));
> > +
> > +             oi->info.sizep = NULL;
> > +             oi->info.typep = NULL;
> > +             oi->info.contentp = temp_contentp;
>
> Here we have a call function and error if bad, then proceed to populate
> stuff (good)...
>
> > +
> > +             if (use_textconv) {
> > +                     act_oi = *oi;
> > +
> > +                     if(!ref->rest)
> > +                             return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
> > +                                                    oid_to_hex(&act_oi.oid));
> > +                     if (act_oi.type == OBJ_BLOB) {
> > +                             if (textconv_object(the_repository,
> > +                                                 ref->rest, 0100644, &act_oi.oid,
> > +                                                 1, (char **)(&act_oi.content), &act_oi.size)) {
> > +                                     actual_oi = &act_oi;
> > +                                     goto success;
> > +                             }
> > +                     }
> > +             }
>
>
> Maybe change the if (A) { if (B) {} ) to if (A && B) {} ?
>

Maybe it's something like this:

@@ -1762,19 +1762,17 @@ static int get_object(struct ref_array_item
*ref, int deref, struct object **obj
                oi->info.typep = NULL;
                oi->info.contentp = temp_contentp;

-               if (use_textconv) {
-                       act_oi = *oi;
+               if (use_textconv && !ref->rest)
+                       return strbuf_addf_ret(err, -1, _("missing
path for '%s'"),
+                                              oid_to_hex(&act_oi.oid));

-                       if(!ref->rest)
-                               return strbuf_addf_ret(err, -1,
_("missing path for '%s'"),
-                                                      oid_to_hex(&act_oi.oid));
-                       if (act_oi.type == OBJ_BLOB) {
-                               if (textconv_object(the_repository,
-                                                   ref->rest,
0100644, &act_oi.oid,
-                                                   1, (char
**)(&act_oi.content), &act_oi.size)) {
-                                       actual_oi = &act_oi;
-                                       goto success;
-                               }
+               if (use_textconv && oi->type == OBJ_BLOB) {
+                       act_oi = *oi;
+                       if (textconv_object(the_repository,
+                                           ref->rest, 0100644, &act_oi.oid,
+                                           1, (char
**)(&act_oi.content), &act_oi.size)) {
+                               actual_oi = &act_oi;
+                               goto success;
                        }
                }
        }

> >       }
> >       if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
> >                                    OBJECT_INFO_LOOKUP_REPLACE))
> > @@ -1748,19 +1786,43 @@ static int get_object(struct ref_array_item *ref, int deref, struct object **obj
> >               BUG("Object size is less than zero.");
> >
> >       if (oi->info.contentp) {
> > -             *obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
> > +             if (use_filters) {
> > +                     if(!ref->rest)
> > +                             return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
> > +                                                    oid_to_hex(&oi->oid));
> > +                     if (oi->type == OBJ_BLOB) {
> > +                             struct strbuf strbuf = STRBUF_INIT;
> > +                             struct checkout_metadata meta;
> > +                             act_oi = *oi;
> > +
> > +                             init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
> > +                             if (convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta)) {
> > +                                     act_oi.size = strbuf.len;
> > +                                     act_oi.content = strbuf_detach(&strbuf, NULL);
> > +                                     actual_oi = &act_oi;
> > +                             } else {
> > +                                     die("could not convert '%s' %s",
> > +                                         oid_to_hex(&oi->oid), ref->rest);
> > +                             }
>
> ... but here instead of "if (!x) { bad } do stuff" we have if (x) {do
> stuff} else { bad }". Better to get the die out of the way, and avoid
> the indentation on the "do stuff" IMO.
>

And this part:

@@ -1786,25 +1784,21 @@ static int get_object(struct ref_array_item
*ref, int deref, struct object **obj
                BUG("Object size is less than zero.");

        if (oi->info.contentp) {
-               if (use_filters) {
-                       if(!ref->rest)
-                               return strbuf_addf_ret(err, -1,
_("missing path for '%s'"),
-                                                      oid_to_hex(&oi->oid));
-                       if (oi->type == OBJ_BLOB) {
-                               struct strbuf strbuf = STRBUF_INIT;
-                               struct checkout_metadata meta;
-                               act_oi = *oi;
-
-                               init_checkout_metadata(&meta, NULL,
NULL, &act_oi.oid);
-                               if
(convert_to_working_tree(&the_index, ref->rest, act_oi.content,
act_oi.size, &strbuf, &meta)) {
-                                       act_oi.size = strbuf.len;
-                                       act_oi.content =
strbuf_detach(&strbuf, NULL);
-                                       actual_oi = &act_oi;
-                               } else {
-                                       die("could not convert '%s' %s",
-                                           oid_to_hex(&oi->oid), ref->rest);
-                               }
-                       }
+               if (use_filters && !ref->rest)
+                       return strbuf_addf_ret(err, -1, _("missing
path for '%s'"),
+                                               oid_to_hex(&oi->oid));
+               if (use_filters && oi->type == OBJ_BLOB) {
+                       struct strbuf strbuf = STRBUF_INIT;
+                       struct checkout_metadata meta;
+                       act_oi = *oi;
+
+                       init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
+                       if (!convert_to_working_tree(&the_index,
ref->rest, act_oi.content, act_oi.size, &strbuf, &meta))
+                               die("could not convert '%s' %s",
+                                       oid_to_hex(&oi->oid), ref->rest);
+                       act_oi.size = strbuf.len;
+                       act_oi.content = strbuf_detach(&strbuf, NULL);
+                       actual_oi = &act_oi;
                }


>
> > -#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
> > +#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, 0, 0, -1 }
>
> Not a new problem, but an earlier cleanup to simply change this to
> designated initializers would be welcome, see recent work of mine in
> fsck.h for an example.
>
> I.e. we keep churning on changing this *_INIT just to populate the one
> -1 field at the end, can also be simply:
>
>     #define FOO_INIT { .that_field = 1 }
>

I agree, this method is better. We don't need to care about which members
so many "0" point to.

Thanks.
--
ZheNing Hu
diff mbox series

Patch

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 1a73c3d23dde..3fde2587201b 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -376,6 +376,11 @@  static int batch_objects(struct batch_options *opt, const struct option *options
 	if (opt->print_contents)
 		strbuf_addstr(&format, "\n%(raw)");
 	opt->format.format = format.buf;
+	if (opt->cmdmode == 'c')
+		opt->format.use_textconv = 1;
+	if (opt->cmdmode == 'w')
+		opt->format.use_filters = 1;
+
 	if (verify_ref_format(&opt->format))
 		usage_with_options(cat_file_usage, options);
 
diff --git a/ref-filter.c b/ref-filter.c
index d4c88d496698..8264ef7d2786 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1,3 +1,4 @@ 
+#define USE_THE_INDEX_COMPATIBILITY_MACROS
 #include "builtin.h"
 #include "cache.h"
 #include "parse-options.h"
@@ -84,6 +85,9 @@  static struct expand_data {
 	struct object_info info;
 } oi, oi_deref;
 
+int use_filters;
+int use_textconv;
+
 struct ref_to_worktree_entry {
 	struct hashmap_entry ent;
 	struct worktree *wt; /* key is wt->head_ref */
@@ -1027,6 +1031,9 @@  int verify_ref_format(struct ref_format *format)
 					       used_atom[at].atom_type == ATOM_WORKTREEPATH)))
 			die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
 
+		use_filters = format->use_filters;
+		use_textconv = format->use_textconv;
+
 		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
 		    used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
@@ -1735,10 +1742,41 @@  static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 {
 	/* parse_object_buffer() will set eaten to 0 if free() will be needed */
 	int eaten = 1;
+	struct expand_data *actual_oi = oi;
+	struct expand_data act_oi = {0};
+
 	if (oi->info.contentp) {
 		/* We need to know that to use parse_object_buffer properly */
+		void **temp_contentp = oi->info.contentp;
+		oi->info.contentp = NULL;
 		oi->info.sizep = &oi->size;
 		oi->info.typep = &oi->type;
+
+		/* get the type and size */
+		if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
+					OBJECT_INFO_LOOKUP_REPLACE))
+			return strbuf_addf_ret(err, 1, _("%s missing"),
+					       oid_to_hex(&oi->oid));
+
+		oi->info.sizep = NULL;
+		oi->info.typep = NULL;
+		oi->info.contentp = temp_contentp;
+
+		if (use_textconv) {
+			act_oi = *oi;
+
+			if(!ref->rest)
+				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+						       oid_to_hex(&act_oi.oid));
+			if (act_oi.type == OBJ_BLOB) {
+				if (textconv_object(the_repository,
+						    ref->rest, 0100644, &act_oi.oid,
+						    1, (char **)(&act_oi.content), &act_oi.size)) {
+					actual_oi = &act_oi;
+					goto success;
+				}
+			}
+		}
 	}
 	if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
 				     OBJECT_INFO_LOOKUP_REPLACE))
@@ -1748,19 +1786,43 @@  static int get_object(struct ref_array_item *ref, int deref, struct object **obj
 		BUG("Object size is less than zero.");
 
 	if (oi->info.contentp) {
-		*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
+		if (use_filters) {
+			if(!ref->rest)
+				return strbuf_addf_ret(err, -1, _("missing path for '%s'"),
+						       oid_to_hex(&oi->oid));
+			if (oi->type == OBJ_BLOB) {
+				struct strbuf strbuf = STRBUF_INIT;
+				struct checkout_metadata meta;
+				act_oi = *oi;
+
+				init_checkout_metadata(&meta, NULL, NULL, &act_oi.oid);
+				if (convert_to_working_tree(&the_index, ref->rest, act_oi.content, act_oi.size, &strbuf, &meta)) {
+					act_oi.size = strbuf.len;
+					act_oi.content = strbuf_detach(&strbuf, NULL);
+					actual_oi = &act_oi;
+				} else {
+					die("could not convert '%s' %s",
+					    oid_to_hex(&oi->oid), ref->rest);
+				}
+			}
+		}
+
+success:
+		*obj = parse_object_buffer(the_repository, &actual_oi->oid, actual_oi->type, actual_oi->size, actual_oi->content, &eaten);
 		if (!*obj) {
 			if (!eaten)
 				free(oi->content);
 			return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
 					       oid_to_hex(&oi->oid), ref->refname);
 		}
-		grab_values(ref->value, deref, *obj, oi);
+		grab_values(ref->value, deref, *obj, actual_oi);
 	}
 
 	grab_common_values(ref->value, deref, oi);
 	if (!eaten)
 		free(oi->content);
+	if (actual_oi != oi)
+		free(actual_oi->content);
 	return 0;
 }
 
diff --git a/ref-filter.h b/ref-filter.h
index bece9583cf18..cf7bad4e8b49 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -80,14 +80,15 @@  struct ref_format {
 	const char *rest;
 	int cat_file_mode;
 	int quote_style;
+	int use_textconv;
+	int use_filters;
 	int use_rest;
 	int use_color;
-
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, 0, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \