diff mbox series

[GSoC,v11,02/10] fsck: add a unified interface for reporting fsck messages

Message ID ZpPFElAYLVtRlq-e@ArchLinux (mailing list archive)
State Superseded
Headers show
Series ref consistency check infra setup | expand

Commit Message

shejialuo July 14, 2024, 12:31 p.m. UTC
The static function "report" provided by "fsck.c" aims at checking fsck
error type and calling the callback "error_func" to report the message.
However, "report" function is only related to object database which
cannot be reused for refs. In order to provide a unified interface which
can report either objects or refs, create a new function "fsck_vreport"
following the "report" prototype. Instead of using "...", provide
"va_list" to allow more flexibility.

When checking loose refs and reflogs, we only need to pass the checked
name to the fsck error report function. However, for packed-refs and
reftable refs, we need to check both the consistency of the file itself
and the refs or reflogs contained in the file. In order to provide above
checks, add two parameters "ref_checkee" and "sub_ref_checkee" in
"fsck_vreport" function.

Like "report", the "fsck_vreport" function will use "error_func"
registered in "fsck_options" to report customized messages. Change
"error_func" prototype to align with the new "fsck_vreport".

Then, change "report" function to use "fsck_vreport" to report objects
related messages. Add a new function called "fsck_refs_report" to use
"fsck_vreport" to report refs related messages.

Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
 builtin/fsck.c  |  2 ++
 builtin/mktag.c |  2 ++
 fsck.c          | 60 ++++++++++++++++++++++++++++++++++++++++++-------
 fsck.h          | 19 +++++++++++++++-
 object-file.c   | 12 +++++-----
 5 files changed, 81 insertions(+), 14 deletions(-)

Comments

Karthik Nayak July 18, 2024, 1:26 p.m. UTC | #1
shejialuo <shejialuo@gmail.com> writes:

> The static function "report" provided by "fsck.c" aims at checking fsck
> error type and calling the callback "error_func" to report the message.
> However, "report" function is only related to object database which
> cannot be reused for refs. In order to provide a unified interface which
> can report either objects or refs, create a new function "fsck_vreport"
> following the "report" prototype. Instead of using "...", provide
> "va_list" to allow more flexibility.
>
> When checking loose refs and reflogs, we only need to pass the checked
> name to the fsck error report function. However, for packed-refs and
> reftable refs, we need to check both the consistency of the file itself
> and the refs or reflogs contained in the file. In order to provide above
> checks, add two parameters "ref_checkee" and "sub_ref_checkee" in
> "fsck_vreport" function.

Nit: It would be nice, if you described here, what is the expected usage
of "ref_checkee" and "sub_ref_checkee".

[snip]

> diff --git a/fsck.h b/fsck.h
> index bcfb2e34cd..61ca38afd6 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -114,19 +114,25 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
>  typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
>  			      void *data, struct fsck_options *options);
>
> -/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
> +/*
> + * callback function for reporting errors when checking either objects or refs
> + */
>  typedef int (*fsck_error)(struct fsck_options *o,
>  			  const struct object_id *oid, enum object_type object_type,
> +			  const char *ref_checkee, const char *sub_ref_checkee,

This makes me really wonder if this is the best way we can do this? This
seems to solve for the current situation, but what happens if you want
to also adding the reftable size or packed-refs size here? Would you
introduce another field?

would it be better to add a single `const struct *fsck_refs_info`
instead?

Perhaps something like:

struct fsck_refs_info {
       char *refname;
       union {
             struct {
                    ...
             } reftable;
             struct {
                    ...
             } files;
       } u;
}

Of course we can fill in the details as we need them.

>  			  enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
>  			  const char *message);
>
>  int fsck_error_function(struct fsck_options *o,
>  			const struct object_id *oid, enum object_type object_type,
> +			const char *ref_checkee, const char *sub_ref_checkee,
>  			enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
>  			const char *message);
>  int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
>  					   const struct object_id *oid,
>  					   enum object_type object_type,
> +					   const char *ref_checkee,
> +					   const char *sub_ref_checkee,
>  					   enum fsck_msg_type msg_type,
>  					   enum fsck_msg_id msg_id,
>  					   const char *message);
> @@ -209,6 +215,17 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
>   */
>  int fsck_finish(struct fsck_options *options);
>
> +/*
> + * Report an error or warning for refs.
> + */
> +__attribute__((format (printf, 6, 7)))
> +int fsck_refs_report(struct fsck_options *options,
> +		     const struct object_id *oid,
> +		     const char *ref_checkee,
> +		     const char *sub_ref_checkee,
> +		     enum fsck_msg_id msg_id,
> +		     const char *fmt, ...);
> +
>  /*
>   * Subsystem for storing human-readable names for each object.
>   *
> diff --git a/object-file.c b/object-file.c
> index 065103be3e..bc63b80c48 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -2470,11 +2470,13 @@ int repo_has_object_file(struct repository *r,
>   * give more context.
>   */
>  static int hash_format_check_report(struct fsck_options *opts UNUSED,
> -				     const struct object_id *oid UNUSED,
> -				     enum object_type object_type UNUSED,
> -				     enum fsck_msg_type msg_type UNUSED,
> -				     enum fsck_msg_id msg_id UNUSED,
> -				     const char *message)
> +				    const struct object_id *oid UNUSED,
> +				    enum object_type object_type UNUSED,
> +				    const char *ref_checkee UNUSED,
> +				    const char *sub_ref_checkee UNUSED,
> +				    enum fsck_msg_type msg_type UNUSED,
> +				    enum fsck_msg_id msg_id UNUSED,
> +				    const char *message)
>  {
>  	error(_("object fails fsck: %s"), message);
>  	return 1;
> --
> 2.45.2
shejialuo July 20, 2024, 7:24 a.m. UTC | #2
On Thu, Jul 18, 2024 at 06:26:30AM -0700, Karthik Nayak wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > diff --git a/fsck.h b/fsck.h
> > index bcfb2e34cd..61ca38afd6 100644
> > --- a/fsck.h
> > +++ b/fsck.h
> > @@ -114,19 +114,25 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
> >  typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
> >  			      void *data, struct fsck_options *options);
> >
> > -/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
> > +/*
> > + * callback function for reporting errors when checking either objects or refs
> > + */
> >  typedef int (*fsck_error)(struct fsck_options *o,
> >  			  const struct object_id *oid, enum object_type object_type,
> > +			  const char *ref_checkee, const char *sub_ref_checkee,
> 
> This makes me really wonder if this is the best way we can do this? This
> seems to solve for the current situation, but what happens if you want
> to also adding the reftable size or packed-refs size here? Would you
> introduce another field?
> 
> would it be better to add a single `const struct *fsck_refs_info`
> instead?
> 
> Perhaps something like:
> 
> struct fsck_refs_info {
>        char *refname;
>        union {
>              struct {
>                     ...
>              } reftable;
>              struct {
>                     ...
>              } files;
>        } u;
> }
> 
> Of course we can fill in the details as we need them.
> 

I agree, we should design an extensible data structure here. I will use
this idea. Because we don't know what we will do in the current time.
However, I think "refname" is not good, instead I decide to use
"ref_checkee", "refname" may let caller think we only check the refname.
However, we need to also check reflog.
diff mbox series

Patch

diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..8aeb8b17e2 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -92,6 +92,8 @@  static int objerror(struct object *obj, const char *err)
 static int fsck_error_func(struct fsck_options *o UNUSED,
 			   const struct object_id *oid,
 			   enum object_type object_type,
+			   const char *ref_checkee UNUSED,
+			   const char *sub_ref_checkee UNUSED,
 			   enum fsck_msg_type msg_type,
 			   enum fsck_msg_id msg_id UNUSED,
 			   const char *message)
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..b5f9e108e5 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -20,6 +20,8 @@  static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
 static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
 				 const struct object_id *oid UNUSED,
 				 enum object_type object_type UNUSED,
+				 const char *ref_checkee UNUSED,
+				 const char *sub_ref_checkee UNUSED,
 				 enum fsck_msg_type msg_type,
 				 enum fsck_msg_id msg_id UNUSED,
 				 const char *message)
diff --git a/fsck.c b/fsck.c
index 3f32441492..7fceecdfae 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,12 +226,19 @@  static int object_on_skiplist(struct fsck_options *opts,
 	return opts && oid && oidset_contains(&opts->skip_oids, oid);
 }
 
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
-		  const struct object_id *oid, enum object_type object_type,
-		  enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int fsck_vreport(struct fsck_options *options,
+			const struct object_id *oid,
+			enum object_type object_type,
+			const char *ref_checkee,
+			const char *sub_ref_checkee,
+			enum fsck_msg_id msg_id, const char *fmt, va_list ap)
 {
-	va_list ap;
+	va_list ap_copy;
 	struct strbuf sb = STRBUF_INIT;
 	enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
 	int result;
@@ -250,9 +257,10 @@  static int report(struct fsck_options *options,
 	prepare_msg_ids();
 	strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
 
-	va_start(ap, fmt);
-	strbuf_vaddf(&sb, fmt, ap);
+	va_copy(ap_copy, ap);
+	strbuf_vaddf(&sb, fmt, ap_copy);
 	result = options->error_func(options, oid, object_type,
+				     ref_checkee, sub_ref_checkee,
 				     msg_type, msg_id, sb.buf);
 	strbuf_release(&sb);
 	va_end(ap);
@@ -260,6 +268,37 @@  static int report(struct fsck_options *options,
 	return result;
 }
 
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+		  const struct object_id *oid, enum object_type object_type,
+		  enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+	va_list ap;
+	int result;
+
+	va_start(ap, fmt);
+	result = fsck_vreport(options, oid, object_type, NULL, NULL,
+			      msg_id, fmt, ap);
+	va_end(ap);
+
+	return result;
+}
+
+int fsck_refs_report(struct fsck_options *options,
+		     const struct object_id *oid,
+		     const char *ref_checkee,
+		     const char *sub_ref_checkee,
+		     enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+	va_list ap;
+	int result;
+	va_start(ap, fmt);
+	result = fsck_vreport(options, oid, OBJ_NONE, ref_checkee, sub_ref_checkee,
+			      msg_id, fmt, ap);
+	va_end(ap);
+	return result;
+}
+
 void fsck_enable_object_names(struct fsck_options *options)
 {
 	if (!options->object_names)
@@ -1203,6 +1242,8 @@  int fsck_buffer(const struct object_id *oid, enum object_type type,
 int fsck_error_function(struct fsck_options *o,
 			const struct object_id *oid,
 			enum object_type object_type UNUSED,
+			const char *ref_checkee UNUSED,
+			const char *sub_ref_checkee UNUSED,
 			enum fsck_msg_type msg_type,
 			enum fsck_msg_id msg_id UNUSED,
 			const char *message)
@@ -1306,6 +1347,8 @@  int git_fsck_config(const char *var, const char *value,
 int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
 					   const struct object_id *oid,
 					   enum object_type object_type,
+					   const char *ref_checkee,
+					   const char *sub_ref_checkee,
 					   enum fsck_msg_type msg_type,
 					   enum fsck_msg_id msg_id,
 					   const char *message)
@@ -1314,5 +1357,6 @@  int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
 		puts(oid_to_hex(oid));
 		return 0;
 	}
-	return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+	return fsck_error_function(o, oid, object_type, ref_checkee,
+				   sub_ref_checkee, msg_type, msg_id, message);
 }
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..61ca38afd6 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,19 +114,25 @@  int is_valid_msg_type(const char *msg_id, const char *msg_type);
 typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
 			      void *data, struct fsck_options *options);
 
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
 typedef int (*fsck_error)(struct fsck_options *o,
 			  const struct object_id *oid, enum object_type object_type,
+			  const char *ref_checkee, const char *sub_ref_checkee,
 			  enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
 			  const char *message);
 
 int fsck_error_function(struct fsck_options *o,
 			const struct object_id *oid, enum object_type object_type,
+			const char *ref_checkee, const char *sub_ref_checkee,
 			enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
 			const char *message);
 int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
 					   const struct object_id *oid,
 					   enum object_type object_type,
+					   const char *ref_checkee,
+					   const char *sub_ref_checkee,
 					   enum fsck_msg_type msg_type,
 					   enum fsck_msg_id msg_id,
 					   const char *message);
@@ -209,6 +215,17 @@  int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
  */
 int fsck_finish(struct fsck_options *options);
 
+/*
+ * Report an error or warning for refs.
+ */
+__attribute__((format (printf, 6, 7)))
+int fsck_refs_report(struct fsck_options *options,
+		     const struct object_id *oid,
+		     const char *ref_checkee,
+		     const char *sub_ref_checkee,
+		     enum fsck_msg_id msg_id,
+		     const char *fmt, ...);
+
 /*
  * Subsystem for storing human-readable names for each object.
  *
diff --git a/object-file.c b/object-file.c
index 065103be3e..bc63b80c48 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,13 @@  int repo_has_object_file(struct repository *r,
  * give more context.
  */
 static int hash_format_check_report(struct fsck_options *opts UNUSED,
-				     const struct object_id *oid UNUSED,
-				     enum object_type object_type UNUSED,
-				     enum fsck_msg_type msg_type UNUSED,
-				     enum fsck_msg_id msg_id UNUSED,
-				     const char *message)
+				    const struct object_id *oid UNUSED,
+				    enum object_type object_type UNUSED,
+				    const char *ref_checkee UNUSED,
+				    const char *sub_ref_checkee UNUSED,
+				    enum fsck_msg_type msg_type UNUSED,
+				    enum fsck_msg_id msg_id UNUSED,
+				    const char *message)
 {
 	error(_("object fails fsck: %s"), message);
 	return 1;