mbox series

[GSoC,v5,00/12] ref consistency check infra setup

Message ID Zn2Ah3WDhtOmzrzn@ArchLinux (mailing list archive)
Headers show
Series ref consistency check infra setup | expand

Message

shejialuo June 27, 2024, 3:08 p.m. UTC
Hi All, this version have been changed a lot compared with the v4. The
mainly changed part is how to integrate the ref checks into the current
git-fsck(1) which concentrates on the object database check.

In the previous version, I design the following structures:

  struct fsck_options {
    struct fsck_refs_options refs_options;
    struct fsck_objs_options objs_options;
    enum fsck_msg_type *msg_type;
    unsigned strict:1,
             verbose:1;
  };

Although Junio said It's OK that therea are sub-structures in
"fsck_options", I decide not to use the above design. This is because in
the current implementation, we either check refs or objects. The above
design is a combination. It will give us the feeling that "fsck_options"
is used to check both refs and objects.

So I decide to create two new structs "fsck_objects_options" and
"fsck_refs_options". So we need to think about how to design this. In my
implementation, I totally change the "fsck_options" to be a general
options which "fsck_objects_options" and "fsck_refs_options" will
incorporate.

However, there are some options related to configs such as
"enum fsck_msg_type *msg_type" and "skiplist". I change them into the
"fskc.c" as the static variable for singleton pattern.

And then extract the common options into "fsck_options". Thus we could
reuse some interfaces when chekcing refs or objects.

At last, solve the review from Junio. Junio hopes that we could use a
unified interface to report fsck error messages. This version solves
this.

BTY, this patch is based on the commit:

  1e1586e4ed (The sixteenth batch, 2024-06-24)

Thanks all

shejialuo (12):
  fsck: rename "fsck_options" to "fsck_objects_options"
  fsck: use "fsck_configs" to set up configs
  fsck: abstract common options for reusing
  fsck: add "fsck_refs_options" struct
  fsck: add a unified interface for reporting fsck messages
  fsck: add "fsck_refs_options" initialization macros
  refs: set up ref consistency check infrastructure
  builtin/refs: add verify subcommand
  builtin/fsck: add `git-refs verify` child process
  files-backend: add unified interface for refs scanning
  fsck: add ref name check for files backend
  fsck: add ref content check for files backend

 Documentation/fsck-msgids.txt |  12 ++
 Documentation/git-refs.txt    |  13 ++
 builtin/fsck.c                |  38 +++--
 builtin/index-pack.c          |  18 +--
 builtin/mktag.c               |  15 +-
 builtin/refs.c                |  36 +++++
 builtin/unpack-objects.c      |  14 +-
 fetch-pack.c                  |  12 +-
 fsck.c                        | 197 +++++++++++++++++++-------
 fsck.h                        | 122 ++++++++++++----
 object-file.c                 |  20 +--
 refs.c                        |   7 +-
 refs.h                        |   8 ++
 refs/debug.c                  |  11 ++
 refs/files-backend.c          | 255 +++++++++++++++++++++++++++++++++-
 refs/packed-backend.c         |   8 ++
 refs/refs-internal.h          |  11 +-
 refs/reftable-backend.c       |   8 ++
 t/t0602-reffiles-fsck.sh      | 211 ++++++++++++++++++++++++++++
 19 files changed, 889 insertions(+), 127 deletions(-)
 create mode 100755 t/t0602-reffiles-fsck.sh

Range-diff against v4:
 1:  2d2c1f2cd3 <  -:  ---------- fsck: add refs check interfaces to interact with fsck error levels
 -:  ---------- >  1:  d83b5797cb fsck: rename "fsck_options" to "fsck_objects_options"
 -:  ---------- >  2:  88000b092a fsck: use "fsck_configs" to set up configs
 -:  ---------- >  3:  65093bae64 fsck: abstract common options for reusing
 -:  ---------- >  4:  403750da30 fsck: add "fsck_refs_options" struct
 -:  ---------- >  5:  8dea5654d5 fsck: add a unified interface for reporting fsck messages
 -:  ---------- >  6:  8fb62b54c6 fsck: add "fsck_refs_options" initialization macros
 2:  4bacffe44e !  7:  2ecd1f5407 refs: set up ref consistency check infrastructure
    @@ refs.c: int check_refname_format(const char *refname, int flags)
      	return check_or_sanitize_refname(refname, flags, NULL);
      }
      
    -+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
    ++int refs_fsck(struct ref_store *refs, struct fsck_refs_options *o)
     +{
     +	return refs->be->fsck(refs, o);
     +}
    @@ refs.h
      
      #include "commit.h"
      
    -+struct fsck_options;
    ++struct fsck_refs_options;
      struct object_id;
      struct ref_store;
      struct repository;
    @@ refs.h: int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void
     + * reflogs are consistent, and non-zero otherwise. The errors will be
     + * written to stderr.
     + */
    -+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
    ++int refs_fsck(struct ref_store *refs, struct fsck_refs_options *o);
     +
      /*
       * Apply the rules from check_refname_format, but mutate the result until it
    @@ refs/debug.c: static int debug_reflog_expire(struct ref_store *ref_store, const
      }
      
     +static int debug_fsck(struct ref_store *ref_store,
    -+		      struct fsck_options *o)
    ++		      struct fsck_refs_options *o)
     +{
     +	struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
     +	int res = drefs->refs->be->fsck(drefs->refs, o);
    @@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
      }
      
     +static int files_fsck(struct ref_store *ref_store,
    -+		      struct fsck_options *o)
    ++		      struct fsck_refs_options *o)
     +{
     +	int ret;
     +	struct files_ref_store *refs =
    @@ refs/packed-backend.c: static struct ref_iterator *packed_reflog_iterator_begin(
      }
      
     +static int packed_fsck(struct ref_store *ref_store,
    -+		       struct fsck_options *o)
    ++		       struct fsck_refs_options *o)
     +{
     +	return 0;
     +}
    @@ refs/refs-internal.h: typedef int read_raw_ref_fn(struct ref_store *ref_store, c
      				 struct strbuf *referent);
      
     +typedef int fsck_fn(struct ref_store *ref_store,
    -+		    struct fsck_options *o);
    ++		    struct fsck_refs_options *o);
     +
      struct ref_storage_be {
      	const char *name;
    @@ refs/reftable-backend.c: static int reftable_be_reflog_expire(struct ref_store *
      }
      
     +static int reftable_be_fsck(struct ref_store *ref_store,
    -+			    struct fsck_options *o)
    ++			    struct fsck_refs_options *o)
     +{
     +	return 0;
     +}
 3:  f00acab4d1 !  8:  534a3d00af builtin/refs: add verify subcommand
    @@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
      
     +static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
     +{
    ++	struct fsck_refs_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
     +	const char * const verify_usage[] = {
     +		REFS_VERIFY_USAGE,
     +		NULL,
     +	};
    -+	int ret = 0;
     +	unsigned int verbose = 0, strict = 0;
    -+	struct fsck_options fsck_options = FSCK_OPTIONS_DEFAULT;
     +	struct option options[] = {
     +		OPT__VERBOSE(&verbose, N_("be verbose")),
     +		OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
    @@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
     +		usage(_("too many arguments"));
     +
     +	if (verbose)
    -+		fsck_options.verbose = 1;
    ++		fsck_refs_options.fsck_options.verbose = 1;
     +	if (strict)
    -+		fsck_options.strict = 1;
    ++		fsck_refs_options.fsck_options.strict = 1;
     +
    -+	git_config(git_fsck_config, &fsck_options);
    ++	git_config(git_fsck_config, &fsck_refs_options.fsck_options);
     +	prepare_repo_settings(the_repository);
     +
    -+	ret = refs_fsck(get_main_ref_store(the_repository), &fsck_options);
    -+
    -+	/*
    -+	 * Explicitly free the allocated array and object skiplist set. Because
    -+	 * we reuse `git_fsck_config` here. It will still set the skiplist.
    -+	 */
    -+	free(fsck_options.msg_type);
    -+	oidset_clear(&fsck_options.objs_options.skiplist);
    -+	return ret;
    ++	return refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
     +}
     +
      int cmd_refs(int argc, const char **argv, const char *prefix)
 4:  6494f5b0c2 =  9:  054c8cea89 builtin/fsck: add `git-refs verify` child process
 5:  2c064f4f71 ! 10:  7f87c6a26e files-backend: add unified interface for refs scanning
    @@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
     + * the whole directory. This function is used as the callback for each
     + * regular file or symlink in the directory.
     + */
    -+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
    ++typedef int (*files_fsck_refs_fn)(struct fsck_refs_options *o,
     +				  const char *gitdir,
     +				  const char *refs_check_dir,
     +				  struct dir_iterator *iter);
     +
     +static int files_fsck_refs_dir(struct ref_store *ref_store,
    -+			       struct fsck_options *o,
    ++			       struct fsck_refs_options *o,
     +			       const char *refs_check_dir,
     +			       files_fsck_refs_fn *fsck_refs_fns)
     +{
    @@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
     +			continue;
     +		} else if (S_ISREG(iter->st.st_mode) ||
     +			   S_ISLNK(iter->st.st_mode)) {
    -+			if (o->verbose)
    ++			if (o->fsck_options.verbose)
     +				fprintf_ln(stderr, "Checking %s/%s",
     +					   refs_check_dir, iter->relative_path);
     +			for (size_t i = 0; fsck_refs_fns[i]; i++) {
    @@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
     +}
     +
     +static int files_fsck_refs(struct ref_store *ref_store,
    -+			   struct fsck_options *o)
    ++			   struct fsck_refs_options *o)
     +{
     +	int ret;
     +	files_fsck_refs_fn fsck_refs_fns[]= {
     +		NULL
     +	};
     +
    -+	if (o->verbose)
    ++	if (o->fsck_options.verbose)
     +		fprintf_ln(stderr, "Checking references consistency");
     +
     +	ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
    @@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
     +}
     +
      static int files_fsck(struct ref_store *ref_store,
    - 		      struct fsck_options *o)
    + 		      struct fsck_refs_options *o)
      {
     @@ refs/files-backend.c: static int files_fsck(struct ref_store *ref_store,
      	struct files_ref_store *refs =
 6:  88b54b9e1a ! 11:  7d78014e5f fsck: add ref name check for files backend
    @@ fsck.h: enum fsck_msg_type {
      	FUNC(BAD_TREE_SHA1, ERROR) \
     
      ## refs/files-backend.c ##
    -@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
    +@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_refs_options *o,
      				  const char *refs_check_dir,
      				  struct dir_iterator *iter);
      
    -+static int files_fsck_refs_name(struct fsck_options *o,
    ++static int files_fsck_refs_name(struct fsck_refs_options *o,
     +				const char *gitdir UNUSED,
     +				const char *refs_check_dir,
     +				struct dir_iterator *iter)
    @@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
     +
     +	if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
     +		strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
    -+		ret = fsck_refs_report(o, sb.buf,
    ++		ret = fsck_refs_report(o, NULL, sb.buf,
     +				       FSCK_MSG_BAD_REF_NAME,
     +				       "invalid refname format");
     +	}
    @@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
     +}
     +
      static int files_fsck_refs_dir(struct ref_store *ref_store,
    - 			       struct fsck_options *o,
    + 			       struct fsck_refs_options *o,
      			       const char *refs_check_dir,
     @@ refs/files-backend.c: static int files_fsck_refs(struct ref_store *ref_store,
      {
 7:  8309a4746a ! 12:  16209a73ed fsck: add ref content check for files backend
    @@ refs/files-backend.c: int parse_loose_ref_contents(const char *buf, struct objec
      	return 0;
      }
      
    -@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
    +@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_refs_options *o,
      	return ret;
      }
      
    @@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
     + * would be the content after "refs:". For symblic link, "pointee_name" would
     + * be the relative path agaignst "gitdir".
     + */
    -+static int files_fsck_symref_target(struct fsck_options *o,
    ++static int files_fsck_symref_target(struct fsck_refs_options *o,
     +				    const char *refname,
     +				    const char *pointee_name,
     +				    const char *pointee_path)
    @@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
     +
     +	if (!skip_prefix(pointee_name, "refs/", &p)) {
     +
    -+		ret = fsck_refs_report(o, refname,
    ++		ret = fsck_refs_report(o, NULL, refname,
     +				       FSCK_MSG_BAD_SYMREF_POINTEE,
     +				       "point to target out of refs hierarchy");
     +		goto out;
     +	}
     +
     +	if (check_refname_format(pointee_name, 0)) {
    -+		ret = fsck_refs_report(o, refname,
    ++		ret = fsck_refs_report(o, NULL, refname,
     +				       FSCK_MSG_BAD_SYMREF_POINTEE,
     +				       "point to invalid refname");
     +	}
    @@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
     +		goto out;
     +
     +	if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
    -+		ret = fsck_refs_report(o, refname,
    ++		ret = fsck_refs_report(o, NULL, refname,
     +				       FSCK_MSG_BAD_SYMREF_POINTEE,
     +				       "point to invalid target");
     +		goto out;
    @@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
     +	return ret;
     +}
     +
    -+static int files_fsck_refs_content(struct fsck_options *o,
    ++static int files_fsck_refs_content(struct fsck_refs_options *o,
     +				   const char *gitdir,
     +				   const char *refs_check_dir,
     +				   struct dir_iterator *iter)
    @@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
     +
     +		if (!skip_prefix(pointee_path.buf,
     +				 abs_gitdir.buf, &pointee_name)) {
    -+			ret = fsck_refs_report(o, refname.buf,
    ++			ret = fsck_refs_report(o, NULL, refname.buf,
     +					       FSCK_MSG_BAD_SYMREF_POINTEE,
     +					       "point to target outside gitdir");
     +			goto clean;
    @@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
     +	if (parse_loose_ref_contents(ref_content.buf, &oid,
     +				     &referent, &type,
     +				     &failure_errno, &trailing)) {
    -+		ret = fsck_refs_report(o, refname.buf,
    ++		ret = fsck_refs_report(o, NULL, refname.buf,
     +				       FSCK_MSG_BAD_REF_CONTENT,
     +				       "invalid ref content");
     +		goto clean;
    @@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
     +		 * be reported as a warning.
     +		 */
     +		if (trailing && (*trailing != '\0' && *trailing != '\n')) {
    -+			ret = fsck_refs_report(o, refname.buf,
    ++			ret = fsck_refs_report(o, NULL, refname.buf,
     +					       FSCK_MSG_TRAILING_REF_CONTENT,
     +					       "trailing garbage in ref");
     +			goto clean;
    @@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
     +}
     +
      static int files_fsck_refs_dir(struct ref_store *ref_store,
    - 			       struct fsck_options *o,
    + 			       struct fsck_refs_options *o,
      			       const char *refs_check_dir,
     @@ refs/files-backend.c: static int files_fsck_refs(struct ref_store *ref_store,
      	int ret;