diff mbox series

[GSoC,v13,08/10] files-backend: add unified interface for refs scanning

Message ID ZqeYw-k-MzhPTNRf@ArchLinux (mailing list archive)
State Superseded
Headers show
Series ref consistency check infra setup | expand

Commit Message

shejialuo July 29, 2024, 1:27 p.m. UTC
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.

Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
 refs/files-backend.c | 74 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

Comments

Patrick Steinhardt July 30, 2024, 8:31 a.m. UTC | #1
On Mon, Jul 29, 2024 at 09:27:31PM +0800, shejialuo wrote:
> For refs and reflogs, we need to scan its corresponding directories to
> check every regular file or symbolic link which shares the same pattern.
> Introduce a unified interface for scanning directories for
> files-backend.
> 
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
>  refs/files-backend.c | 74 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 73 insertions(+), 1 deletion(-)
> 
> diff --git a/refs/files-backend.c b/refs/files-backend.c
> index 4630eb1f80..cb184953c1 100644
> --- a/refs/files-backend.c
> +++ b/refs/files-backend.c
> @@ -6,6 +6,7 @@
>  #include "../gettext.h"
>  #include "../hash.h"
>  #include "../hex.h"
> +#include "../fsck.h"
>  #include "../refs.h"
>  #include "refs-internal.h"
>  #include "ref-cache.h"
> @@ -3408,13 +3409,84 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
>  	return ret;
>  }
>  
> +/*
> + * For refs and reflogs, they share a unified interface when scanning
> + * the whole directory. This function is used as the callback for each
> + * regular file or symlink in the directory.
> + */
> +typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
> +				  const char *gitdir,
> +				  const char *refs_check_dir,
> +				  struct dir_iterator *iter);
> +
> +static int files_fsck_refs_dir(struct ref_store *ref_store,
> +			       struct fsck_options *o,
> +			       const char *refs_check_dir,
> +			       files_fsck_refs_fn *fsck_refs_fns)
> +{
> +	const char *gitdir = ref_store->gitdir;
> +	struct strbuf sb = STRBUF_INIT;
> +	struct dir_iterator *iter;
> +	int iter_status;
> +	int ret = 0;
> +
> +	strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
> +
> +	iter = dir_iterator_begin(sb.buf, 0);
> +
> +	if (!iter) {
> +		ret = error_errno("cannot open directory %s", sb.buf);
> +		goto out;
> +	}

The error message should probably be marked as translatable. Also, I'd
personally remove the newline between `iter = ...` and the error check
as those are a logical unit.

> +	while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
> +		if (S_ISDIR(iter->st.st_mode)) {
> +			continue;
> +		} else if (S_ISREG(iter->st.st_mode) ||
> +			   S_ISLNK(iter->st.st_mode)) {
> +			if (o->verbose)
> +				fprintf_ln(stderr, "Checking %s/%s",
> +					   refs_check_dir, iter->relative_path);

Okay, we do end up using the `verbose` flag :)

> +			for (size_t i = 0; fsck_refs_fns[i]; i++) {
> +				if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
> +					ret = -1;
> +			}
> +		} else {
> +			ret = error(_("unexpected file type for '%s'"),
> +				    iter->basename);

Instead of printing this as an error directly, shouldn't we report it
via the `fsck_refs_report` interface?

> +		}
> +	}

Okay. It does make sense to do our own directory walk as that will allow
us to check files which would otherwise not be reported by the normal
refs interfaces.

> +	if (iter_status != ITER_DONE)
> +		ret = error(_("failed to iterate over '%s'"), sb.buf);

Reporting this as an error feels sensible though as we have no ref to
tie this error to, and it feels like a generic error.

> +out:
> +	strbuf_release(&sb);
> +	return ret;
> +}
> +
> +static int files_fsck_refs(struct ref_store *ref_store,
> +			   struct fsck_options *o)
> +{
> +	files_fsck_refs_fn fsck_refs_fns[]= {
> +		NULL

The last member should also end with a comma.

> +	};
> +
> +	if (o->verbose)
> +		fprintf_ln(stderr, "Checking references consistency");
> +
> +	return files_fsck_refs_dir(ref_store, o,  "refs", fsck_refs_fns);
> +

This newline should be removed.

> +}
> +
>  static int files_fsck(struct ref_store *ref_store,
>  		      struct fsck_options *o)
>  {
>  	struct files_ref_store *refs =
>  		files_downcast(ref_store, REF_STORE_READ, "fsck");
>  
> -	return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
> +	return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o) |
> +	       files_fsck_refs(ref_store, o);

I'd think we should first check loose files and then continue to check
the packed ref store. That's really only a gut feeling though, and I
cannot exactly say why that feels more natural.

Patrick
shejialuo July 30, 2024, 4:10 p.m. UTC | #2
> > +	iter = dir_iterator_begin(sb.buf, 0);
> > +
> > +	if (!iter) {
> > +		ret = error_errno("cannot open directory %s", sb.buf);
> > +		goto out;
> > +	}
> 
> The error message should probably be marked as translatable. Also, I'd
> personally remove the newline between `iter = ...` and the error check
> as those are a logical unit.
> 

Yes, I will improve this in the next version.

> > +			for (size_t i = 0; fsck_refs_fns[i]; i++) {
> > +				if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
> > +					ret = -1;
> > +			}
> > +		} else {
> > +			ret = error(_("unexpected file type for '%s'"),
> > +				    iter->basename);
> 
> Instead of printing this as an error directly, shouldn't we report it
> via the `fsck_refs_report` interface?
> 

Yes, exactly we should use this interface. I accidentally ignored this.
Thanks.

> > +out:
> > +	strbuf_release(&sb);
> > +	return ret;
> > +}
> > +
> > +static int files_fsck_refs(struct ref_store *ref_store,
> > +			   struct fsck_options *o)
> > +{
> > +	files_fsck_refs_fn fsck_refs_fns[]= {
> > +		NULL
> 
> The last member should also end with a comma.
> 

I will improve this in the next version.

> > +	};
> > +
> > +	if (o->verbose)
> > +		fprintf_ln(stderr, "Checking references consistency");
> > +
> > +	return files_fsck_refs_dir(ref_store, o,  "refs", fsck_refs_fns);
> > +
> 
> This newline should be removed.
> 

OK.

> > +}
> > +
> >  static int files_fsck(struct ref_store *ref_store,
> >  		      struct fsck_options *o)
> >  {
> >  	struct files_ref_store *refs =
> >  		files_downcast(ref_store, REF_STORE_READ, "fsck");
> >  
> > -	return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
> > +	return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o) |
> > +	       files_fsck_refs(ref_store, o);
> 
> I'd think we should first check loose files and then continue to check
> the packed ref store. That's really only a gut feeling though, and I
> cannot exactly say why that feels more natural.
> 

It would feel more natural. Because packed-ref will point to the
loose ref. So we should first check the loose refs. For example. If a
regular ref is bad, we will first report the problem. And suppose the
packed-ref have recorded this regular ref. When checking packed-ref, we
could not check the regular ref itself. We only need to check one thing.

  Whether the pointee exists under the "refs/" directory.

And we do not need to check regular ref again because we have checked in
the loose refs part.

> Patrick
diff mbox series

Patch

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 4630eb1f80..cb184953c1 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@ 
 #include "../gettext.h"
 #include "../hash.h"
 #include "../hex.h"
+#include "../fsck.h"
 #include "../refs.h"
 #include "refs-internal.h"
 #include "ref-cache.h"
@@ -3408,13 +3409,84 @@  static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
 	return ret;
 }
 
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+				  const char *gitdir,
+				  const char *refs_check_dir,
+				  struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+			       struct fsck_options *o,
+			       const char *refs_check_dir,
+			       files_fsck_refs_fn *fsck_refs_fns)
+{
+	const char *gitdir = ref_store->gitdir;
+	struct strbuf sb = STRBUF_INIT;
+	struct dir_iterator *iter;
+	int iter_status;
+	int ret = 0;
+
+	strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+	iter = dir_iterator_begin(sb.buf, 0);
+
+	if (!iter) {
+		ret = error_errno("cannot open directory %s", sb.buf);
+		goto out;
+	}
+
+	while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+		if (S_ISDIR(iter->st.st_mode)) {
+			continue;
+		} else if (S_ISREG(iter->st.st_mode) ||
+			   S_ISLNK(iter->st.st_mode)) {
+			if (o->verbose)
+				fprintf_ln(stderr, "Checking %s/%s",
+					   refs_check_dir, iter->relative_path);
+			for (size_t i = 0; fsck_refs_fns[i]; i++) {
+				if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+					ret = -1;
+			}
+		} else {
+			ret = error(_("unexpected file type for '%s'"),
+				    iter->basename);
+		}
+	}
+
+	if (iter_status != ITER_DONE)
+		ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+	strbuf_release(&sb);
+	return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+			   struct fsck_options *o)
+{
+	files_fsck_refs_fn fsck_refs_fns[]= {
+		NULL
+	};
+
+	if (o->verbose)
+		fprintf_ln(stderr, "Checking references consistency");
+
+	return files_fsck_refs_dir(ref_store, o,  "refs", fsck_refs_fns);
+
+}
+
 static int files_fsck(struct ref_store *ref_store,
 		      struct fsck_options *o)
 {
 	struct files_ref_store *refs =
 		files_downcast(ref_store, REF_STORE_READ, "fsck");
 
-	return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+	return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o) |
+	       files_fsck_refs(ref_store, o);
 }
 
 struct ref_storage_be refs_be_files = {