[v2,13/24] builtin/init-db: allow specifying hash algorithm on command line
diff mbox series

Message ID 20200222201749.937983-14-sandals@crustytoothpaste.net
State New
Headers show
Series
  • SHA-256 stage 4 implementation, part 1/3
Related show

Commit Message

brian m. carlson Feb. 22, 2020, 8:17 p.m. UTC
Allow the user to specify the hash algorithm on the command line by
using the --object-format option to git init.  Validate that the user is
not attempting to reinitialize a repository with a different hash
algorithm.  Ensure that if we are writing a non-SHA-1 repository that we
set the repository version to 1 and write the objectFormat extension.

Restrict this option to work only when ENABLE_SHA256 is set until the
codebase is in a situation to fully support this.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 Documentation/git-init.txt |  7 ++++-
 builtin/clone.c            |  2 +-
 builtin/init-db.c          | 52 +++++++++++++++++++++++++++++++++-----
 cache.h                    |  3 ++-
 4 files changed, 55 insertions(+), 9 deletions(-)

Comments

Junio C Hamano Feb. 24, 2020, 6:14 p.m. UTC | #1
"brian m. carlson" <sandals@crustytoothpaste.net> writes:

>  'git init' [-q | --quiet] [--bare] [--template=<template_directory>]
> -	  [--separate-git-dir <git dir>]
> +	  [--separate-git-dir <git dir>] [--object-format=<format]

A missing closing ket> in <bra-ket> pair.

> +#ifndef ENABLE_SHA256
> +	if (fmt->hash_algo != GIT_HASH_SHA1)
> +		die(_("The hash algorithm %s is not supported in this build."), hash_algos[fmt->hash_algo].name);

Could you fold the overlong line here?

>  int init_db(const char *git_dir, const char *real_git_dir,
> -	    const char *template_dir, unsigned int flags)
> +	    const char *template_dir, int hash, unsigned int flags)

Perhaps rename "hash" to "hash_algo"?  I know that it is very
unlikely for a variable whose name is 'hash' to be mistaken as a raw
hash value when its type is "int" (as opposed to say char *), but
still.  I wouldn't be saying this if its type were an "enum
hash_algo" or something like that.

> +	const char *object_format = NULL;
> +	int hash_algo = GIT_HASH_UNKNOWN;

This one _is_ good.

>  	const struct option init_db_options[] = {
>  		OPT_STRING(0, "template", &template_dir, N_("template-directory"),
>  				N_("directory from which templates will be used")),
> @@ -494,6 +526,8 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
>  		OPT_BIT('q', "quiet", &flags, N_("be quiet"), INIT_DB_QUIET),
>  		OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
>  			   N_("separate git dir from working tree")),
> +		OPT_STRING(0, "object-format", &object_format, N_("hash"),
> +			   N_("specify the hash algorithm to use")),
>  		OPT_END()
>  	};
>  
> @@ -546,6 +580,12 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
>  		free(cwd);
>  	}
>  
> +	if (object_format) {
> +		hash_algo = hash_algo_by_name(object_format);
> +		if (hash_algo == GIT_HASH_UNKNOWN)
> +			die(_("unknown hash algorithm '%s'"), object_format);
> +	}
> +
>  	if (init_shared_repository != -1)
>  		set_shared_repository(init_shared_repository);
>  
> @@ -597,5 +637,5 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
>  	UNLEAK(work_tree);
>  
>  	flags |= INIT_DB_EXIST_OK;
> -	return init_db(git_dir, real_git_dir, template_dir, flags);
> +	return init_db(git_dir, real_git_dir, template_dir, hash_algo, flags);
>  }
> diff --git a/cache.h b/cache.h
> index 29ee02a8d4..7a47e023ba 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -627,7 +627,8 @@ int path_inside_repo(const char *prefix, const char *path);
>  #define INIT_DB_EXIST_OK 0x0002
>  
>  int init_db(const char *git_dir, const char *real_git_dir,
> -	    const char *template_dir, unsigned int flags);
> +	    const char *template_dir, int hash_algo,

So is this one.

> +	    unsigned int flags);
>  
>  void sanitize_stdfds(void);
>  int daemonize(void);
brian m. carlson Feb. 25, 2020, 12:11 a.m. UTC | #2
On 2020-02-24 at 18:14:33, Junio C Hamano wrote:
> "brian m. carlson" <sandals@crustytoothpaste.net> writes:
> 
> >  'git init' [-q | --quiet] [--bare] [--template=<template_directory>]
> > -	  [--separate-git-dir <git dir>]
> > +	  [--separate-git-dir <git dir>] [--object-format=<format]
> 
> A missing closing ket> in <bra-ket> pair.

Good catch.

> > +#ifndef ENABLE_SHA256
> > +	if (fmt->hash_algo != GIT_HASH_SHA1)
> > +		die(_("The hash algorithm %s is not supported in this build."), hash_algos[fmt->hash_algo].name);
> 
> Could you fold the overlong line here?

Absolutely.

> >  int init_db(const char *git_dir, const char *real_git_dir,
> > -	    const char *template_dir, unsigned int flags)
> > +	    const char *template_dir, int hash, unsigned int flags)
> 
> Perhaps rename "hash" to "hash_algo"?  I know that it is very
> unlikely for a variable whose name is 'hash' to be mistaken as a raw
> hash value when its type is "int" (as opposed to say char *), but
> still.  I wouldn't be saying this if its type were an "enum
> hash_algo" or something like that.

Sure, that's reasonable.

Patch
diff mbox series

diff --git a/Documentation/git-init.txt b/Documentation/git-init.txt
index 32880aafb0..adc6adfd38 100644
--- a/Documentation/git-init.txt
+++ b/Documentation/git-init.txt
@@ -10,7 +10,7 @@  SYNOPSIS
 --------
 [verse]
 'git init' [-q | --quiet] [--bare] [--template=<template_directory>]
-	  [--separate-git-dir <git dir>]
+	  [--separate-git-dir <git dir>] [--object-format=<format]
 	  [--shared[=<permissions>]] [directory]
 
 
@@ -48,6 +48,11 @@  Only print error and warning messages; all other output will be suppressed.
 Create a bare repository. If `GIT_DIR` environment is not set, it is set to the
 current working directory.
 
+--object-format=<format>::
+
+Specify the given object format (hash algorithm) for the repository.  The valid
+values are 'sha1' and (if enabled) 'sha256'.  'sha1' is the default.
+
 --template=<template_directory>::
 
 Specify the directory from which templates will be used.  (See the "TEMPLATE
diff --git a/builtin/clone.c b/builtin/clone.c
index 4f6150c55c..961996a110 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1097,7 +1097,7 @@  int cmd_clone(int argc, const char **argv, const char *prefix)
 		}
 	}
 
-	init_db(git_dir, real_git_dir, option_template, INIT_DB_QUIET);
+	init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, INIT_DB_QUIET);
 
 	if (real_git_dir)
 		git_dir = real_git_dir;
diff --git a/builtin/init-db.c b/builtin/init-db.c
index b11f07064d..d05552f0ae 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -177,7 +177,8 @@  static int needs_work_tree_config(const char *git_dir, const char *work_tree)
 }
 
 static int create_default_files(const char *template_path,
-				const char *original_git_dir)
+				const char *original_git_dir,
+				const struct repository_format *fmt)
 {
 	struct stat st1;
 	struct strbuf buf = STRBUF_INIT;
@@ -187,6 +188,7 @@  static int create_default_files(const char *template_path,
 	int reinit;
 	int filemode;
 	struct strbuf err = STRBUF_INIT;
+	int repo_version = GIT_REPO_VERSION;
 
 	/* Just look for `init.templatedir` */
 	init_db_template_dir = NULL; /* re-set in case it was set before */
@@ -244,11 +246,23 @@  static int create_default_files(const char *template_path,
 			exit(1);
 	}
 
+#ifndef ENABLE_SHA256
+	if (fmt->hash_algo != GIT_HASH_SHA1)
+		die(_("The hash algorithm %s is not supported in this build."), hash_algos[fmt->hash_algo].name);
+#endif
+
+	if (fmt->hash_algo != GIT_HASH_SHA1)
+		repo_version = GIT_REPO_VERSION_READ;
+
 	/* This forces creation of new config file */
 	xsnprintf(repo_version_string, sizeof(repo_version_string),
-		  "%d", GIT_REPO_VERSION);
+		  "%d", repo_version);
 	git_config_set("core.repositoryformatversion", repo_version_string);
 
+	if (fmt->hash_algo != GIT_HASH_SHA1)
+		git_config_set("extensions.objectformat",
+			       hash_algos[fmt->hash_algo].name);
+
 	/* Check filemode trustability */
 	path = git_path_buf(&buf, "config");
 	filemode = TEST_FILEMODE;
@@ -340,12 +354,26 @@  static void separate_git_dir(const char *git_dir, const char *git_link)
 	write_file(git_link, "gitdir: %s", git_dir);
 }
 
+static void validate_hash_algorithm(struct repository_format *repo_fmt, int hash)
+{
+	/*
+	 * If we already have an initialized repo, don't allow the user to
+	 * specify a different algorithm, as that could cause corruption.
+	 * Otherwise, if the user has specified one on the command line, use it.
+	 */
+	if (repo_fmt->version >= 0 && hash != GIT_HASH_UNKNOWN && hash != repo_fmt->hash_algo)
+		die(_("attempt to reinitialize repository with different hash"));
+	else if (hash != GIT_HASH_UNKNOWN)
+		repo_fmt->hash_algo = hash;
+}
+
 int init_db(const char *git_dir, const char *real_git_dir,
-	    const char *template_dir, unsigned int flags)
+	    const char *template_dir, int hash, unsigned int flags)
 {
 	int reinit;
 	int exist_ok = flags & INIT_DB_EXIST_OK;
 	char *original_git_dir = real_pathdup(git_dir, 1);
+	struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT;
 
 	if (real_git_dir) {
 		struct stat st;
@@ -378,9 +406,11 @@  int init_db(const char *git_dir, const char *real_git_dir,
 	 * config file, so this will not fail.  What we are catching
 	 * is an attempt to reinitialize new repository with an old tool.
 	 */
-	check_repository_format(NULL);
+	check_repository_format(&repo_fmt);
 
-	reinit = create_default_files(template_dir, original_git_dir);
+	validate_hash_algorithm(&repo_fmt, hash);
+
+	reinit = create_default_files(template_dir, original_git_dir, &repo_fmt);
 
 	create_object_directory();
 
@@ -482,6 +512,8 @@  int cmd_init_db(int argc, const char **argv, const char *prefix)
 	const char *work_tree;
 	const char *template_dir = NULL;
 	unsigned int flags = 0;
+	const char *object_format = NULL;
+	int hash_algo = GIT_HASH_UNKNOWN;
 	const struct option init_db_options[] = {
 		OPT_STRING(0, "template", &template_dir, N_("template-directory"),
 				N_("directory from which templates will be used")),
@@ -494,6 +526,8 @@  int cmd_init_db(int argc, const char **argv, const char *prefix)
 		OPT_BIT('q', "quiet", &flags, N_("be quiet"), INIT_DB_QUIET),
 		OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
 			   N_("separate git dir from working tree")),
+		OPT_STRING(0, "object-format", &object_format, N_("hash"),
+			   N_("specify the hash algorithm to use")),
 		OPT_END()
 	};
 
@@ -546,6 +580,12 @@  int cmd_init_db(int argc, const char **argv, const char *prefix)
 		free(cwd);
 	}
 
+	if (object_format) {
+		hash_algo = hash_algo_by_name(object_format);
+		if (hash_algo == GIT_HASH_UNKNOWN)
+			die(_("unknown hash algorithm '%s'"), object_format);
+	}
+
 	if (init_shared_repository != -1)
 		set_shared_repository(init_shared_repository);
 
@@ -597,5 +637,5 @@  int cmd_init_db(int argc, const char **argv, const char *prefix)
 	UNLEAK(work_tree);
 
 	flags |= INIT_DB_EXIST_OK;
-	return init_db(git_dir, real_git_dir, template_dir, flags);
+	return init_db(git_dir, real_git_dir, template_dir, hash_algo, flags);
 }
diff --git a/cache.h b/cache.h
index 29ee02a8d4..7a47e023ba 100644
--- a/cache.h
+++ b/cache.h
@@ -627,7 +627,8 @@  int path_inside_repo(const char *prefix, const char *path);
 #define INIT_DB_EXIST_OK 0x0002
 
 int init_db(const char *git_dir, const char *real_git_dir,
-	    const char *template_dir, unsigned int flags);
+	    const char *template_dir, int hash_algo,
+	    unsigned int flags);
 
 void sanitize_stdfds(void);
 int daemonize(void);