diff mbox series

[GSoC,v9,3/9] fsck: add refs-related options and error report function

Message ID Zo0uiz1y6hJld2Rv@ArchLinux (mailing list archive)
State Superseded
Headers show
Series ref consistency check infra setup | expand

Commit Message

shejialuo July 9, 2024, 12:35 p.m. UTC
Add refs-related options to the "fsck_options", create refs-specific
"error_func" callback "fsck_refs_error_function".

"fsck_refs_error_function" will use the "oid" parameter. When the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.

Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
macros to create refs options easily.

Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
 fsck.c | 23 +++++++++++++++++++++++
 fsck.h | 15 +++++++++++++++
 2 files changed, 38 insertions(+)

Comments

Justin Tobler July 9, 2024, 9:29 p.m. UTC | #1
On 24/07/09 08:35PM, shejialuo wrote:
> Add refs-related options to the "fsck_options", create refs-specific
> "error_func" callback "fsck_refs_error_function".
> 
> "fsck_refs_error_function" will use the "oid" parameter. When the caller
> passes the oid, it will use "oid_to_hex" to get the corresponding hex
> value to report to the caller.

Out of curiousity, under what circumstances would the caller want to
also pass the oid? Would it simply be to optionally provide additional
context?

> 
> Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
> macros to create refs options easily.
> 
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
>  fsck.c | 23 +++++++++++++++++++++++
>  fsck.h | 15 +++++++++++++++
>  2 files changed, 38 insertions(+)
> 
> diff --git a/fsck.c b/fsck.c
> index e1819964e3..c5c7e8454f 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -1252,6 +1252,29 @@ int fsck_objects_error_function(struct fsck_options *o,
>  	return 1;
>  }
>  
> +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> +			     const struct object_id *oid,
> +			     enum object_type object_type UNUSED,
> +			     const char *checked_ref_name,
> +			     enum fsck_msg_type msg_type,
> +			     enum fsck_msg_id msg_id UNUSED,
> +			     const char *message)
> +{
> +	static struct strbuf sb = STRBUF_INIT;
> +
> +	strbuf_reset(&sb);

Naive question, is there reason to reset `sb` immediately after
`STRBUF_INIT`? My understanding is that because we initialize the
buffer, the other fields should also be zeroed. If so, resetting the
buffer here seems redundant.

> +	strbuf_addstr(&sb, checked_ref_name);
> +	if (oid)
> +		strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
> +
> +	if (msg_type == FSCK_WARN) {
> +		warning("%s: %s", sb.buf, message);
> +		return 0;
> +	}
> +	error("%s: %s", sb.buf, message);
> +	return 1;
> +}
> +
>  static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
>  		      enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
>  		      struct fsck_options *options, const char *blob_type)
> diff --git a/fsck.h b/fsck.h
> index 8ce48395f6..ff52913494 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -135,11 +135,19 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
>  						   enum fsck_msg_type msg_type,
>  						   enum fsck_msg_id msg_id,
>  						   const char *message);
> +int fsck_refs_error_function(struct fsck_options *options,
> +			     const struct object_id *oid,
> +			     enum object_type object_type,
> +			     const char *checked_ref_name,
> +			     enum fsck_msg_type msg_type,
> +			     enum fsck_msg_id msg_id,
> +			     const char *message);
>  
>  struct fsck_options {
>  	fsck_walk_func walk;
>  	fsck_error error_func;
>  	unsigned strict:1;
> +	unsigned verbose_refs:1;

What is the purpose of adding `verbose_refs` in this patch? At this
point, I'm not seeing it used. If there is a reason to be included in
this patch, it might be nice to mention in the commit message.

>  	enum fsck_msg_type *msg_type;
>  	struct oidset skip_oids;
>  	struct oidset gitmodules_found;
> @@ -173,6 +181,13 @@ struct fsck_options {
>  	.gitattributes_done = OIDSET_INIT, \
>  	.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
>  }
> +#define FSCK_REFS_OPTIONS_DEFAULT { \
> +	.error_func = fsck_refs_error_function, \
> +}
> +#define FSCK_REFS_OPTIONS_STRICT { \
> +	.strict = 1, \
> +	.error_func = fsck_refs_error_function, \
> +}
>  
>  /* descend in all linked child objects
>   * the return value is:
> -- 
> 2.45.2
>
Eric Sunshine July 9, 2024, 9:40 p.m. UTC | #2
On Tue, Jul 9, 2024 at 5:30 PM Justin Tobler <jltobler@gmail.com> wrote:
> On 24/07/09 08:35PM, shejialuo wrote:
> > +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> > +                          const struct object_id *oid,
> > +                          enum object_type object_type UNUSED,
> > +                          const char *checked_ref_name,
> > +                          enum fsck_msg_type msg_type,
> > +                          enum fsck_msg_id msg_id UNUSED,
> > +                          const char *message)
> > +{
> > +     static struct strbuf sb = STRBUF_INIT;
> > +
> > +     strbuf_reset(&sb);
>
> Naive question, is there reason to reset `sb` immediately after
> `STRBUF_INIT`? My understanding is that because we initialize the
> buffer, the other fields should also be zeroed. If so, resetting the
> buffer here seems redundant.

This particular strbuf is static, so it needs to be cleared each time
the function is called.

The cover letter provides an argument for making it static: that this
will be called often, and we don't want to make a lot of repeated
allocations. Personally, I find that argument rather weak. Why would
an error function be called frequently? Is this really a hot path that
needs to worry about a few extra allocations? Also, importantly, every
static added makes the code harder to "libify", so making it static
requires a very strong reason, but there doesn't seem to be such a
reason in this case.
shejialuo July 10, 2024, 12:13 p.m. UTC | #3
On Tue, Jul 09, 2024 at 05:40:08PM -0400, Eric Sunshine wrote:
> On Tue, Jul 9, 2024 at 5:30 PM Justin Tobler <jltobler@gmail.com> wrote:
> > On 24/07/09 08:35PM, shejialuo wrote:
> > > +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> > > +                          const struct object_id *oid,
> > > +                          enum object_type object_type UNUSED,
> > > +                          const char *checked_ref_name,
> > > +                          enum fsck_msg_type msg_type,
> > > +                          enum fsck_msg_id msg_id UNUSED,
> > > +                          const char *message)
> > > +{
> > > +     static struct strbuf sb = STRBUF_INIT;
> > > +
> > > +     strbuf_reset(&sb);
> >
> > Naive question, is there reason to reset `sb` immediately after
> > `STRBUF_INIT`? My understanding is that because we initialize the
> > buffer, the other fields should also be zeroed. If so, resetting the
> > buffer here seems redundant.
> 
> This particular strbuf is static, so it needs to be cleared each time
> the function is called.
> 
> The cover letter provides an argument for making it static: that this
> will be called often, and we don't want to make a lot of repeated
> allocations. Personally, I find that argument rather weak. Why would
> an error function be called frequently? Is this really a hot path that
> needs to worry about a few extra allocations? Also, importantly, every
> static added makes the code harder to "libify", so making it static
> requires a very strong reason, but there doesn't seem to be such a
> reason in this case.

I didn't consider the issue of libify. I just want to reduce some memory
allocations. I will change this in the next version.

Thanks,
Jialuo
shejialuo July 10, 2024, 12:28 p.m. UTC | #4
On Tue, Jul 09, 2024 at 04:29:24PM -0500, Justin Tobler wrote:
> On 24/07/09 08:35PM, shejialuo wrote:
> > Add refs-related options to the "fsck_options", create refs-specific
> > "error_func" callback "fsck_refs_error_function".
> > 
> > "fsck_refs_error_function" will use the "oid" parameter. When the caller
> > passes the oid, it will use "oid_to_hex" to get the corresponding hex
> > value to report to the caller.
> 
> Out of curiousity, under what circumstances would the caller want to
> also pass the oid? Would it simply be to optionally provide additional
> context?
> 

Because when we check the refs, we will use "parse_loose_ref_contents"
here to check the ref contents. Below is the prototype:

  int parse_loose_ref_contents(const char *buf,
                               struct object_id *oid,
                               ...)

So we could get a oid here. However, we don't know the type of the oid.
It may not be commit object but rather a tag object. And I want to
provide more flexible operations for caller. When caller passes the oid.
The message could be the following:

  ref_name -> (oid) : fsck_error_type: user-passed message.

So, actually we have provided additional context for the caller. From
another perspective, the object check needs the "oid" parameter, we
cannot remove it from the callback "error_func" prototype. So why not
just reuse this parameter? It truly provides the caller more flexibility
without big changes.

> >  struct fsck_options {
> >  	fsck_walk_func walk;
> >  	fsck_error error_func;
> >  	unsigned strict:1;
> > +	unsigned verbose_refs:1;
> 
> What is the purpose of adding `verbose_refs` in this patch? At this
> point, I'm not seeing it used. If there is a reason to be included in
> this patch, it might be nice to mention in the commit message.
> 

The reason is that fsck builtin handles the object check but we want to
use "git refs verify" command to handle ref check and we put all the
real functionality into each file backend. And there is only one entry
point in the "git refs verify" command. So we need to use "fsck_options"
as the parameter to maintain this state across the ref checks.

Actually, "git-fsck(1)" maintains "verbose" global variable. I think I
should not add this option in this patch which may cause a lot of
confusion here. I will improve this in the next version.

> >  	enum fsck_msg_type *msg_type;
> >  	struct oidset skip_oids;
> >  	struct oidset gitmodules_found;
> > @@ -173,6 +181,13 @@ struct fsck_options {
> >  	.gitattributes_done = OIDSET_INIT, \
> >  	.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> >  }
> > +#define FSCK_REFS_OPTIONS_DEFAULT { \
> > +	.error_func = fsck_refs_error_function, \
> > +}
> > +#define FSCK_REFS_OPTIONS_STRICT { \
> > +	.strict = 1, \
> > +	.error_func = fsck_refs_error_function, \
> > +}
> >  
> >  /* descend in all linked child objects
> >   * the return value is:
> > -- 
> > 2.45.2
> >
diff mbox series

Patch

diff --git a/fsck.c b/fsck.c
index e1819964e3..c5c7e8454f 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1252,6 +1252,29 @@  int fsck_objects_error_function(struct fsck_options *o,
 	return 1;
 }
 
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+			     const struct object_id *oid,
+			     enum object_type object_type UNUSED,
+			     const char *checked_ref_name,
+			     enum fsck_msg_type msg_type,
+			     enum fsck_msg_id msg_id UNUSED,
+			     const char *message)
+{
+	static struct strbuf sb = STRBUF_INIT;
+
+	strbuf_reset(&sb);
+	strbuf_addstr(&sb, checked_ref_name);
+	if (oid)
+		strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+	if (msg_type == FSCK_WARN) {
+		warning("%s: %s", sb.buf, message);
+		return 0;
+	}
+	error("%s: %s", sb.buf, message);
+	return 1;
+}
+
 static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
 		      enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
 		      struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index 8ce48395f6..ff52913494 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,11 +135,19 @@  int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
 						   enum fsck_msg_type msg_type,
 						   enum fsck_msg_id msg_id,
 						   const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+			     const struct object_id *oid,
+			     enum object_type object_type,
+			     const char *checked_ref_name,
+			     enum fsck_msg_type msg_type,
+			     enum fsck_msg_id msg_id,
+			     const char *message);
 
 struct fsck_options {
 	fsck_walk_func walk;
 	fsck_error error_func;
 	unsigned strict:1;
+	unsigned verbose_refs:1;
 	enum fsck_msg_type *msg_type;
 	struct oidset skip_oids;
 	struct oidset gitmodules_found;
@@ -173,6 +181,13 @@  struct fsck_options {
 	.gitattributes_done = OIDSET_INIT, \
 	.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
 }
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+	.error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+	.strict = 1, \
+	.error_func = fsck_refs_error_function, \
+}
 
 /* descend in all linked child objects
  * the return value is: