diff mbox series

[GSoC,v5,02/12] fsck: use "fsck_configs" to set up configs

Message ID Zn2BgEw0geZwvr3_@ArchLinux (mailing list archive)
State New
Headers show
Series ref consistency check infra setup | expand

Commit Message

shejialuo June 27, 2024, 3:13 p.m. UTC
Some fields such as "msg_type" and "skiplist" in "fsck_objects_options"
are not options, these fields are related to "git-config(1)" which are
initialized using "git_fsck_config" function.  Create a static variable
named "fsck_configs" in "fsck.c" which aims at handling configs. Thus we
don't need to reply on the "fsck_objects_options" to set up the fsck
error message severity.

Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
 fsck.c | 29 ++++++++++++++++++-----------
 fsck.h |  3 ---
 2 files changed, 18 insertions(+), 14 deletions(-)

Comments

Junio C Hamano June 27, 2024, 9:43 p.m. UTC | #1
shejialuo <shejialuo@gmail.com> writes:

> Some fields such as "msg_type" and "skiplist" in "fsck_objects_options"
> are not options, these fields are related to "git-config(1)" which are
> initialized using "git_fsck_config" function.  Create a static variable
> named "fsck_configs" in "fsck.c" which aims at handling configs. Thus we
> don't need to reply on the "fsck_objects_options" to set up the fsck
> error message severity.

reply???

As configuration often is used to prepopulate options, I need a lot
stonger justification to split these into a different structure than
"'config' is a noun that is different from a noun 'option'".  

If we intend to have many "option" instances but what these two
members store will be the same across these "option" instances, for
example, that would be a lot better reason why we may want to
separate these two members out of it, but I have a suspicion that if
we were to use the "union with tags" approach, these two would
become members of the common part shared between "objects' and
"refs", i.e. the overall data structure might look more like this:

	struct fsck_options {
		enum fsck_msg_type *msg_type;
		struct oidset oid_skiplist;
		enum fsck_what_check { O, R } tag;
		union {
			struct fsck_object_options o;
			struct fsck_ref_options r;
		} u;
	};

by moving these two members out of fsck_object_options and moving
them to the shared part.

I dunno.  It is unclear what the real reason is for these two things
to be extracted out and made to appear totally independent from the
"options" thing to begin with, and I am not sure if I agree with the
reason when it is known.
shejialuo June 28, 2024, 4:22 a.m. UTC | #2
On Thu, Jun 27, 2024 at 02:43:47PM -0700, Junio C Hamano wrote:
> shejialuo <shejialuo@gmail.com> writes:
> 
> > Some fields such as "msg_type" and "skiplist" in "fsck_objects_options"
> > are not options, these fields are related to "git-config(1)" which are
> > initialized using "git_fsck_config" function.  Create a static variable
> > named "fsck_configs" in "fsck.c" which aims at handling configs. Thus we
> > don't need to reply on the "fsck_objects_options" to set up the fsck
> > error message severity.
> 
> reply???
> 

Sorry, I often make mistake to type "rely" as "reply".

> As configuration often is used to prepopulate options, I need a lot
> stonger justification to split these into a different structure than
> "'config' is a noun that is different from a noun 'option'".  
> 
> If we intend to have many "option" instances but what these two
> members store will be the same across these "option" instances, for
> example, that would be a lot better reason why we may want to
> separate these two members out of it, but I have a suspicion that if
> we were to use the "union with tags" approach, these two would
> become members of the common part shared between "objects' and
> "refs", i.e. the overall data structure might look more like this:
> 

Actually, I feel really wired for this part. Let me elaborate on this.
"fsck.c::git_fsck_config()" is used to set up the configs. It will
eventually call the "fsck.c::fsck_set_msg_type_from_ids" like the
following:

  void fsck_set_msg_type_from_ids(struct fsck_options *options,
                                  enum fsck_msg_id msg_id,
                                  enum fsck_msg_type msg_type)
  {
    if (!options->msg_type) {
      int i;
      enum fsck_msg_type *severity.
      ALLOC_ARRAY(severity, FSCK_MSG_MAX);
      for (i = 0; i < FSCK_MSG_MAX; i++)
        severity[i] = fsck_msg_type(i, options);
      options->msg_type = severity;
    }

   options->msg_type[msg_id] = msg_type;
  }

In the current codebase, the caller will simply create a "fsck_options"
and setup the fsck error message severity. However, let's see
"builtin/fskc.c", it creates the following two "fsck_options" and it
only uses

  static struct fsck_options fsck_walk_options = FSCK_OPTIONS_DEFAULT;
  static struct fsck_options fsck_obj_options = FSCK_OPTIONS_DEFAULT;

However, the code only uses "fsck_obj_options" to setup the configs. So
it makes me feel so strange. So I just want to make it separation. Maybe
a little wrong here.

> 	struct fsck_options {
> 		enum fsck_msg_type *msg_type;
> 		struct oidset oid_skiplist;
> 		enum fsck_what_check { O, R } tag;
> 		union {
> 			struct fsck_object_options o;
> 			struct fsck_ref_options r;
> 		} u;
> 	};
> 
> by moving these two members out of fsck_object_options and moving
> them to the shared part.
> 
> I dunno.  It is unclear what the real reason is for these two things
> to be extracted out and made to appear totally independent from the
> "options" thing to begin with, and I am not sure if I agree with the
> reason when it is known.
diff mbox series

Patch

diff --git a/fsck.c b/fsck.c
index c24a0f9fae..81b93f02fc 100644
--- a/fsck.c
+++ b/fsck.c
@@ -24,6 +24,14 @@ 
 
 static ssize_t max_tree_entry_len = 4096;
 
+static struct {
+	enum fsck_msg_type *msg_type;
+	struct oidset oid_skiplist;
+} fsck_configs = {
+	.msg_type = NULL,
+	.oid_skiplist = OIDSET_INIT
+};
+
 #define STR(x) #x
 #define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
 static struct {
@@ -103,7 +111,7 @@  static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
 {
 	assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
 
-	if (!options->msg_type) {
+	if (!fsck_configs.msg_type) {
 		enum fsck_msg_type msg_type = msg_id_info[msg_id].msg_type;
 
 		if (options->strict && msg_type == FSCK_WARN)
@@ -111,7 +119,7 @@  static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
 		return msg_type;
 	}
 
-	return options->msg_type[msg_id];
+	return fsck_configs.msg_type[msg_id];
 }
 
 static enum fsck_msg_type parse_msg_type(const char *str)
@@ -138,16 +146,16 @@  void fsck_set_msg_type_from_ids(struct fsck_objects_options *options,
 				enum fsck_msg_id msg_id,
 				enum fsck_msg_type msg_type)
 {
-	if (!options->msg_type) {
+	if (!fsck_configs.msg_type) {
 		int i;
 		enum fsck_msg_type *severity;
 		ALLOC_ARRAY(severity, FSCK_MSG_MAX);
 		for (i = 0; i < FSCK_MSG_MAX; i++)
 			severity[i] = fsck_msg_type(i, options);
-		options->msg_type = severity;
+		fsck_configs.msg_type = severity;
 	}
 
-	options->msg_type[msg_id] = msg_type;
+	fsck_configs.msg_type[msg_id] = msg_type;
 }
 
 void fsck_set_msg_type(struct fsck_objects_options *options,
@@ -203,7 +211,7 @@  void fsck_set_msg_types(struct fsck_objects_options *options, const char *values
 		if (!strcmp(buf, "skiplist")) {
 			if (equal == len)
 				die("skiplist requires a path");
-			oidset_parse_file(&options->skiplist, buf + equal + 1);
+			oidset_parse_file(&fsck_configs.oid_skiplist, buf + equal + 1);
 			buf += len + 1;
 			continue;
 		}
@@ -217,10 +225,9 @@  void fsck_set_msg_types(struct fsck_objects_options *options, const char *values
 	free(to_free);
 }
 
-static int object_on_skiplist(struct fsck_objects_options *opts,
-			      const struct object_id *oid)
+static int object_on_skiplist(const struct object_id *oid)
 {
-	return opts && oid && oidset_contains(&opts->skiplist, oid);
+	return oid && oidset_contains(&fsck_configs.oid_skiplist, oid);
 }
 
 __attribute__((format (printf, 5, 6)))
@@ -236,7 +243,7 @@  static int report(struct fsck_objects_options *options,
 	if (msg_type == FSCK_IGNORE)
 		return 0;
 
-	if (object_on_skiplist(options, oid))
+	if (object_on_skiplist(oid))
 		return 0;
 
 	if (msg_type == FSCK_FATAL)
@@ -1109,7 +1116,7 @@  static int fsck_blob(const struct object_id *oid, const char *buf,
 {
 	int ret = 0;
 
-	if (object_on_skiplist(options, oid))
+	if (object_on_skiplist(oid))
 		return 0;
 
 	if (oidset_contains(&options->gitmodules_found, oid)) {
diff --git a/fsck.h b/fsck.h
index b64164db17..37deadc4bd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,8 +135,6 @@  struct fsck_objects_options {
 	fsck_walk_func walk;
 	fsck_error error_func;
 	unsigned strict:1;
-	enum fsck_msg_type *msg_type;
-	struct oidset skiplist;
 	struct oidset gitmodules_found;
 	struct oidset gitmodules_done;
 	struct oidset gitattributes_found;
@@ -145,7 +143,6 @@  struct fsck_objects_options {
 };
 
 #define FSCK_OBJECTS_OPTIONS_DEFAULT { \
-	.skiplist = OIDSET_INIT, \
 	.gitmodules_found = OIDSET_INIT, \
 	.gitmodules_done = OIDSET_INIT, \
 	.gitattributes_found = OIDSET_INIT, \