diff mbox series

[GSoC,v2,1/2] submodule--helper: introduce add-clone subcommand

Message ID 20210608095655.47324-2-raykar.ath@gmail.com (mailing list archive)
State Superseded
Headers show
Series submodule--helper: introduce subcommands for sh to C conversion | expand

Commit Message

Atharva Raykar June 8, 2021, 9:56 a.m. UTC
Let's add a new "add-clone" subcommand to `git submodule--helper` with
the goal of converting part of the shell code in git-submodule.sh
related to `git submodule add` into C code. This new subcommand clones
the repository that is to be added, and checks out to the appropriate
branch.

This is meant to be a faithful conversion that leaves the behaviour of
'submodule add' unchanged. The only minor change is that if a submodule name has
been supplied with a name that clashes with a local submodule, the message shown
to the user ("A git directory for 'foo' is found locally...") is prepended with
"error" for clarity.

This is part of a series of changes that will result in all of 'submodule add'
being converted to C.

Signed-off-by: Atharva Raykar <raykar.ath@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Shourya Shukla <shouryashukla.oo@gmail.com>
Based-on-patch-by: Shourya Shukla <shouryashukla.oo@gmail.com>
Based-on-patch-by: Prathamesh Chavan <pc44800@gmail.com>
---
 builtin/submodule--helper.c | 199 ++++++++++++++++++++++++++++++++++++
 git-submodule.sh            |  38 +------
 2 files changed, 200 insertions(+), 37 deletions(-)

Comments

Đoàn Trần Công Danh June 8, 2021, 12:32 p.m. UTC | #1
On 2021-06-08 15:26:54+0530, Atharva Raykar <raykar.ath@gmail.com> wrote:
> Let's add a new "add-clone" subcommand to `git submodule--helper` with
> the goal of converting part of the shell code in git-submodule.sh
> related to `git submodule add` into C code. This new subcommand clones
> the repository that is to be added, and checks out to the appropriate
> branch.
> 
> This is meant to be a faithful conversion that leaves the behaviour of
> 'submodule add' unchanged. The only minor change is that if a submodule name has
> been supplied with a name that clashes with a local submodule, the message shown
> to the user ("A git directory for 'foo' is found locally...") is prepended with
> "error" for clarity.
> 
> This is part of a series of changes that will result in all of 'submodule add'
> being converted to C.
> 
> Signed-off-by: Atharva Raykar <raykar.ath@gmail.com>
> Mentored-by: Christian Couder <christian.couder@gmail.com>
> Mentored-by: Shourya Shukla <shouryashukla.oo@gmail.com>
> Based-on-patch-by: Shourya Shukla <shouryashukla.oo@gmail.com>
> Based-on-patch-by: Prathamesh Chavan <pc44800@gmail.com>
> ---
>  builtin/submodule--helper.c | 199 ++++++++++++++++++++++++++++++++++++
>  git-submodule.sh            |  38 +------
>  2 files changed, 200 insertions(+), 37 deletions(-)
> 
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index d55f6262e9..c9cb535312 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -2745,6 +2745,204 @@ static int module_set_branch(int argc, const char **argv, const char *prefix)
>  	return !!ret;
>  }
>  
> +struct add_data {
> +	const char *prefix;
> +	const char *branch;
> +	const char *reference_path;
> +	const char *sm_path;
> +	const char *sm_name;
> +	const char *repo;
> +	const char *realrepo;
> +	int depth;
> +	unsigned int force: 1;
> +	unsigned int quiet: 1;
> +	unsigned int progress: 1;
> +	unsigned int dissociate: 1;
> +};
> +#define ADD_DATA_INIT { .depth = -1 }
> +
> +static char *parse_token(char **begin, const char *end, int *tok_len)
> +{
> +	char *tok_start, *pos = *begin;
> +	while (pos != end && (*pos != ' ' && *pos != '\t' && *pos != '\n'))
> +		pos++;
> +	tok_start = *begin;
> +	*tok_len = pos - *begin;
> +	*begin = pos + 1;
> +	return tok_start;
> +}
> +
> +static char *get_next_line(char *const begin, const char *const end)
> +{
> +	char *pos = begin;
> +	while (pos != end && *pos++ != '\n');
> +	return pos;
> +}

On my first glance, this function looks like a reinvention of strchr(3).
Except that, this function also has a second parameter for "end".
Maybe it has a specical use-case?

> +
> +static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path)
> +{
> +	struct child_process cp_remote = CHILD_PROCESS_INIT;
> +	struct strbuf sb_remote_out = STRBUF_INIT;
> +
> +	cp_remote.git_cmd = 1;
> +	strvec_pushf(&cp_remote.env_array,
> +		     "GIT_DIR=%s", git_dir_path);
> +	strvec_push(&cp_remote.env_array, "GIT_WORK_TREE=.");
> +	strvec_pushl(&cp_remote.args, "remote", "-v", NULL);
> +	if (!capture_command(&cp_remote, &sb_remote_out, 0)) {
> +		char *line;
> +		char *begin = sb_remote_out.buf;
> +		char *end = sb_remote_out.buf + sb_remote_out.len;
> +		while (begin != end && (line = get_next_line(begin, end))) {

And this is the only use-case.  Because you also want to check if you
reached the last token or not.  I guess you really meant to write:

	while ((line = strchr(begin, '\n')) != NULL) {

Anyway, I would name the "line" variable as "nextline"

> +			int namelen = 0, urllen = 0, taillen = 0;
> +			char *name = parse_token(&begin, line, &namelen);
> +			char *url = parse_token(&begin, line, &urllen);
> +			char *tail = parse_token(&begin, line, &taillen);
> +			if (!memcmp(tail, "(fetch)", 7))
> +				fprintf(output, "  %.*s\t%.*s\n",
> +					namelen, name, urllen, url);

I think this whole block is better replaced with strip_suffix_mem and
fprintf.

Overral I would replace the block inside capture_command with:

-----8<-----
	char *nextline;
	char *line = sb_remote_out.buf;
	while ((nextline = strchr(line, '\n')) != NULL) {
		size_t len = nextline - line;
		if (strip_suffix_mem(line, &len, "(fetch)"))
			fprintf(output, "  %.*s\n", (int)len, line);
		line = nextline + 1;
	}
---->8-----

And get rid of parse_token and get_next_line functions.



> +		}
> +	}
> +
> +	strbuf_release(&sb_remote_out);
> +}
> +
> +static int add_submodule(const struct add_data *add_data)
> +{
> +	char *submod_gitdir_path;
> +	/* perhaps the path already exists and is already a git repo, else clone it */
> +	if (is_directory(add_data->sm_path)) {
> +		submod_gitdir_path = xstrfmt("%s/.git", add_data->sm_path);
> +		if (is_directory(submod_gitdir_path) || file_exists(submod_gitdir_path))
> +			printf(_("Adding existing path at '%s' to index\n"),
> +			       add_data->sm_path);
> +		else
> +			die(_("'%s' already exists and is not a valid git repo"),
> +			    add_data->sm_path);
> +		free(submod_gitdir_path);
> +	} else {
> +		struct strvec clone_args = STRVEC_INIT;
> +		struct child_process cp = CHILD_PROCESS_INIT;
> +		submod_gitdir_path = xstrfmt(".git/modules/%s", add_data->sm_name);
> +
> +		if (is_directory(submod_gitdir_path)) {
> +			if (!add_data->force) {
> +				error(_("A git directory for '%s' is found "
> +					"locally with remote(s):"), add_data->sm_name);

We don't capitalise first character of error message.
IOW, downcase "A git".

Well, it's bug-for-bug with shell implementation, so it doesn't matter much, anyway.

> +				show_fetch_remotes(stderr, add_data->sm_name,
> +						   submod_gitdir_path);
> +				fprintf(stderr,
> +					_("If you want to reuse this local git "
> +					  "directory instead of cloning again from\n"
> +					  "  %s\n"
> +					  "use the '--force' option. If the local git "
> +					  "directory is not the correct repo\n"
> +					  "or if you are unsure what this means, choose "
> +					  "another name with the '--name' option.\n"),
> +					add_data->realrepo);

Is there any reason we can't use "error" here?

> +				free(submod_gitdir_path);
> +				return 1;
> +			} else {
> +				printf(_("Reactivating local git directory for "
> +					 "submodule '%s'\n"), add_data->sm_name);
> +			}
> +		}
> +		free(submod_gitdir_path);
> +
> +		strvec_pushl(&clone_args, "clone", "--path", add_data->sm_path, "--name",
> +			     add_data->sm_name, "--url", add_data->realrepo, NULL);
> +		if (add_data->quiet)
> +			strvec_push(&clone_args, "--quiet");
> +		if (add_data->progress)
> +			strvec_push(&clone_args, "--progress");
> +		if (add_data->prefix)
> +			strvec_pushl(&clone_args, "--prefix", add_data->prefix, NULL);
> +		if (add_data->reference_path)
> +			strvec_pushl(&clone_args, "--reference",
> +				     add_data->reference_path, NULL);
> +		if (add_data->dissociate)
> +			strvec_push(&clone_args, "--dissociate");
> +		if (add_data->depth >= 0)
> +			strvec_pushf(&clone_args, "--depth=%d", add_data->depth);
> +
> +		if (module_clone(clone_args.nr, clone_args.v, add_data->prefix)) {
> +			strvec_clear(&clone_args);
> +			return -1;
> +		}
> +		strvec_clear(&clone_args);
> +
> +		prepare_submodule_repo_env(&cp.env_array);
> +		cp.git_cmd = 1;
> +		cp.dir = add_data->sm_path;
> +		strvec_pushl(&cp.args, "checkout", "-f", "-q", NULL);
> +
> +		if (add_data->branch) {
> +			strvec_pushl(&cp.args, "-B", add_data->branch, NULL);
> +			strvec_pushf(&cp.args, "origin/%s", add_data->branch);
> +		}
> +
> +		if (run_command(&cp))
> +			die(_("unable to checkout submodule '%s'"), add_data->sm_path);
> +	}
> +	return 0;
> +}
> +
> +static int add_clone(int argc, const char **argv, const char *prefix)
> +{
> +	int force = 0, quiet = 0, dissociate = 0, progress = 0;
> +	struct add_data add_data = ADD_DATA_INIT;
> +
> +	struct option options[] = {
> +		OPT_STRING('b', "branch", &add_data.branch,
> +			   N_("branch"),
> +			   N_("branch of repository to checkout on cloning")),
> +		OPT_STRING(0, "prefix", &add_data.prefix,
> +			   N_("path"),
> +			   N_("alternative anchor for relative paths")),
> +		OPT_STRING(0, "path", &add_data.sm_path,
> +			   N_("path"),
> +			   N_("where the new submodule will be cloned to")),
> +		OPT_STRING(0, "name", &add_data.sm_name,
> +			   N_("string"),
> +			   N_("name of the new submodule")),
> +		OPT_STRING(0, "url", &add_data.realrepo,
> +			   N_("string"),
> +			   N_("url where to clone the submodule from")),
> +		OPT_STRING(0, "reference", &add_data.reference_path,
> +			   N_("repo"),
> +			   N_("reference repository")),
> +		OPT_BOOL(0, "dissociate", &dissociate,
> +			 N_("use --reference only while cloning")),
> +		OPT_INTEGER(0, "depth", &add_data.depth,
> +			    N_("depth for shallow clones")),
> +		OPT_BOOL(0, "progress", &progress,
> +			 N_("force cloning progress")),
> +		OPT_BOOL('f', "force", &force,
> +			 N_("allow adding an otherwise ignored submodule path")),

We have OPT__FORCE, too.

> +		OPT__QUIET(&quiet, "Suppress output for cloning a submodule"),

And, please downcase "Suppress".

> +		OPT_END()
> +	};
> +
> +	const char *const usage[] = {
> +		N_("git submodule--helper add-clone [--prefix=<path>] [--quiet] [--force] "
> +		   "[--reference <repository>] [--depth <depth>] [-b|--branch <branch>]"
> +		   "[--progress] [--dissociate] --url <url> --path <path> --name <name>"),

I think it's too crowded here, I guess it's better to write:

	N_("git submodule--helper add-clone [<options>...] --url <url> --path <path> --name <name>"),

> +		NULL
> +	};
> +
> +	argc = parse_options(argc, argv, prefix, options, usage, 0);

From above usage, I think url, path, name is required, should we have a check for them, here?
> +
> +	add_data.progress = !!progress;
> +	add_data.dissociate = !!dissociate;
> +	add_data.force = !!force;
> +	add_data.quiet = !!quiet;
> +
> +	if (add_submodule(&add_data))
> +		return 1;
> +
> +	return 0;
> +}
> +
>  #define SUPPORT_SUPER_PREFIX (1<<0)
>  
>  struct cmd_struct {
> @@ -2757,6 +2955,7 @@ static struct cmd_struct commands[] = {
>  	{"list", module_list, 0},
>  	{"name", module_name, 0},
>  	{"clone", module_clone, 0},
> +	{"add-clone", add_clone, 0},
>  	{"update-module-mode", module_update_module_mode, 0},
>  	{"update-clone", update_clone, 0},
>  	{"ensure-core-worktree", ensure_core_worktree, 0},
> diff --git a/git-submodule.sh b/git-submodule.sh
> index 4678378424..f71e1e5495 100755
> --- a/git-submodule.sh
> +++ b/git-submodule.sh
> @@ -241,43 +241,7 @@ cmd_add()
>  		die "$(eval_gettext "'$sm_name' is not a valid submodule name")"
>  	fi
>  
> -	# perhaps the path exists and is already a git repo, else clone it
> -	if test -e "$sm_path"
> -	then
> -		if test -d "$sm_path"/.git || test -f "$sm_path"/.git
> -		then
> -			eval_gettextln "Adding existing repo at '\$sm_path' to the index"
> -		else
> -			die "$(eval_gettext "'\$sm_path' already exists and is not a valid git repo")"
> -		fi
> -
> -	else
> -		if test -d ".git/modules/$sm_name"
> -		then
> -			if test -z "$force"
> -			then
> -				eval_gettextln >&2 "A git directory for '\$sm_name' is found locally with remote(s):"
> -				GIT_DIR=".git/modules/$sm_name" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^,"  ", -e s,' (fetch)',, >&2
> -				die "$(eval_gettextln "\
> -If you want to reuse this local git directory instead of cloning again from
> -  \$realrepo
> -use the '--force' option. If the local git directory is not the correct repo
> -or you are unsure what this means choose another name with the '--name' option.")"
> -			else
> -				eval_gettextln "Reactivating local git directory for submodule '\$sm_name'."
> -			fi
> -		fi
> -		git submodule--helper clone ${GIT_QUIET:+--quiet} ${progress:+"--progress"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
> -		(
> -			sanitize_submodule_env
> -			cd "$sm_path" &&
> -			# ash fails to wordsplit ${branch:+-b "$branch"...}
> -			case "$branch" in
> -			'') git checkout -f -q ;;
> -			?*) git checkout -f -q -B "$branch" "origin/$branch" ;;
> -			esac
> -		) || die "$(eval_gettext "Unable to checkout submodule '\$sm_path'")"
> -	fi
> +	git submodule--helper add-clone ${GIT_QUIET:+--quiet} ${force:+"--force"} ${progress:+"--progress"} ${branch:+--branch "$branch"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
>  	git config submodule."$sm_name".url "$realrepo"
>  
>  	git add --no-warn-embedded-repo $force "$sm_path" ||
> -- 
> 2.31.1
>
Junio C Hamano June 9, 2021, 4:24 a.m. UTC | #2
Just a bit of random comments, leaving the full review to mentors.

> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index d55f6262e9..c9cb535312 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -2745,6 +2745,204 @@ static int module_set_branch(int argc, const char **argv, const char *prefix)
>  	return !!ret;
>  }
>  
> +struct add_data {
> +	const char *prefix;
> +	const char *branch;
> +	const char *reference_path;
> +	const char *sm_path;
> +	const char *sm_name;
> +	const char *repo;
> +	const char *realrepo;
> +	int depth;
> +	unsigned int force: 1;
> +	unsigned int quiet: 1;
> +	unsigned int progress: 1;
> +	unsigned int dissociate: 1;
> +};
> +#define ADD_DATA_INIT { .depth = -1 }
> +
> +static char *parse_token(char **begin, const char *end, int *tok_len)
> +{
> +	char *tok_start, *pos = *begin;

Make it a habit to have a blank line between the initial block
of declarations and the first statement.

> +	while (pos != end && (*pos != ' ' && *pos != '\t' && *pos != '\n'))
> +		pos++;
> +	tok_start = *begin;
> +	*tok_len = pos - *begin;
> +	*begin = pos + 1;
> +	return tok_start;
> +}
> +static char *get_next_line(char *const begin, const char *const end)
> +{
> +	char *pos = begin;
> +	while (pos != end && *pos++ != '\n');

Write an empty loop on two lines, like this:

	while (... condition ...)
		; /* keep scanning */

If there is a NUL byte between begin and end, this keeps going and
the resulting string will contain one.  Is that a problem?

> +	return pos;
> +}

In general, this project is mature enough that we should question
ourselves if there is already a suitable line parser we can reuse
when tempted to write another one.

> +static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path)
> +{
> +	struct child_process cp_remote = CHILD_PROCESS_INIT;
> +	struct strbuf sb_remote_out = STRBUF_INIT;
> +
> +	cp_remote.git_cmd = 1;
> +	strvec_pushf(&cp_remote.env_array,
> +		     "GIT_DIR=%s", git_dir_path);
> +	strvec_push(&cp_remote.env_array, "GIT_WORK_TREE=.");
> +	strvec_pushl(&cp_remote.args, "remote", "-v", NULL);
> +	if (!capture_command(&cp_remote, &sb_remote_out, 0)) {
> +		char *line;
> +		char *begin = sb_remote_out.buf;
> +		char *end = sb_remote_out.buf + sb_remote_out.len;
> +		while (begin != end && (line = get_next_line(begin, end))) {

OK, so this tries to parse output from "git remote -v", so NUL will
not be an issue at all.  We will get a string that is NUL terminated
and has zero or more lines, terminated with LFs.

If that is the case, I think it is far easier to read without
a custom get-next-line wrapper, e.g.

	for (this_line = begin;
	     *this_line;
	     this_line = next_line) {
		next_line = strchrnul(this_line, '\n');
		... process bytes between this_line..next_line ...
	}                

> +			int namelen = 0, urllen = 0, taillen = 0;
> +			char *name = parse_token(&begin, line, &namelen);

Similarly, consider if strcspn() is useful in implementing
parse_token().  See how existing code uses the standard system
function with

	$ git grep strcspn \*.c

> +			char *url = parse_token(&begin, line, &urllen);
> +			char *tail = parse_token(&begin, line, &taillen);
> +			if (!memcmp(tail, "(fetch)", 7))

At this point do we know there are enough number of bytes after
tail[0] to allow us to do this comparison safely?  Otherwise,

			if (starts_with(tail, "(fetch)")

may be preferrable.
Atharva Raykar June 9, 2021, 10:31 a.m. UTC | #3
On 08-Jun-2021, at 18:02, Đoàn Trần Công Danh <congdanhqx@gmail.com> wrote:
> 
>> [...]
>> +static char *get_next_line(char *const begin, const char *const end)
>> +{
>> +	char *pos = begin;
>> +	while (pos != end && *pos++ != '\n');
>> +	return pos;
>> +}
> 
> On my first glance, this function looks like a reinvention of strchr(3).
> Except that, this function also has a second parameter for "end".
> Maybe it has a specical use-case?
> 
>> +
>> +static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path)
>> +{
>> +	struct child_process cp_remote = CHILD_PROCESS_INIT;
>> +	struct strbuf sb_remote_out = STRBUF_INIT;
>> +
>> +	cp_remote.git_cmd = 1;
>> +	strvec_pushf(&cp_remote.env_array,
>> +		     "GIT_DIR=%s", git_dir_path);
>> +	strvec_push(&cp_remote.env_array, "GIT_WORK_TREE=.");
>> +	strvec_pushl(&cp_remote.args, "remote", "-v", NULL);
>> +	if (!capture_command(&cp_remote, &sb_remote_out, 0)) {
>> +		char *line;
>> +		char *begin = sb_remote_out.buf;
>> +		char *end = sb_remote_out.buf + sb_remote_out.len;
>> +		while (begin != end && (line = get_next_line(begin, end))) {
> 
> And this is the only use-case.  Because you also want to check if you
> reached the last token or not.  I guess you really meant to write:
> 
> 	while ((line = strchr(begin, '\n')) != NULL) {
> 
> Anyway, I would name the "line" variable as "nextline"
> 
>> +			int namelen = 0, urllen = 0, taillen = 0;
>> +			char *name = parse_token(&begin, line, &namelen);
>> +			char *url = parse_token(&begin, line, &urllen);
>> +			char *tail = parse_token(&begin, line, &taillen);
>> +			if (!memcmp(tail, "(fetch)", 7))
>> +				fprintf(output, "  %.*s\t%.*s\n",
>> +					namelen, name, urllen, url);
> 
> I think this whole block is better replaced with strip_suffix_mem and
> fprintf.
> 
> Overral I would replace the block inside capture_command with:
> 
> -----8<-----
> 	char *nextline;
> 	char *line = sb_remote_out.buf;
> 	while ((nextline = strchr(line, '\n')) != NULL) {
> 		size_t len = nextline - line;
> 		if (strip_suffix_mem(line, &len, "(fetch)"))
> 			fprintf(output, "  %.*s\n", (int)len, line);
> 		line = nextline + 1;
> 	}
> ---->8-----
> 
> And get rid of parse_token and get_next_line functions.

That looks much simpler. Thanks!

I realised that all the token parsing I do is not really necessary.
What I really want to do is "If this line ends with '(fetch)',
print it, but without the '(fetch)'", and I think your version
captures that succinctly.

>> +		}
>> +	}
>> +
>> +	strbuf_release(&sb_remote_out);
>> +}
>> +
>> +static int add_submodule(const struct add_data *add_data)
>> +{
>> +	char *submod_gitdir_path;
>> +	/* perhaps the path already exists and is already a git repo, else clone it */
>> +	if (is_directory(add_data->sm_path)) {
>> +		submod_gitdir_path = xstrfmt("%s/.git", add_data->sm_path);
>> +		if (is_directory(submod_gitdir_path) || file_exists(submod_gitdir_path))
>> +			printf(_("Adding existing path at '%s' to index\n"),
>> +			       add_data->sm_path);
>> +		else
>> +			die(_("'%s' already exists and is not a valid git repo"),
>> +			    add_data->sm_path);
>> +		free(submod_gitdir_path);
>> +	} else {
>> +		struct strvec clone_args = STRVEC_INIT;
>> +		struct child_process cp = CHILD_PROCESS_INIT;
>> +		submod_gitdir_path = xstrfmt(".git/modules/%s", add_data->sm_name);
>> +
>> +		if (is_directory(submod_gitdir_path)) {
>> +			if (!add_data->force) {
>> +				error(_("A git directory for '%s' is found "
>> +					"locally with remote(s):"), add_data->sm_name);
> 
> We don't capitalise first character of error message.
> IOW, downcase "A git".

Got it.

> Well, it's bug-for-bug with shell implementation, so it doesn't matter much, anyway.

While it is meant to be a faithful implementation, I think this
is a good opportunity to make minor style fixes.

>> +				show_fetch_remotes(stderr, add_data->sm_name,
>> +						   submod_gitdir_path);
>> +				fprintf(stderr,
>> +					_("If you want to reuse this local git "
>> +					  "directory instead of cloning again from\n"
>> +					  "  %s\n"
>> +					  "use the '--force' option. If the local git "
>> +					  "directory is not the correct repo\n"
>> +					  "or if you are unsure what this means, choose "
>> +					  "another name with the '--name' option.\n"),
>> +					add_data->realrepo);
> 
> Is there any reason we can't use "error" here?

The message in its entirety looks like this:

error: A git directory for 'test' is found locally with remote(s):
  origin	git@github.com:tfidfwastaken/abc.git
If you want to reuse this local git directory instead of cloning again from
  git@github.com:tfidfwastaken/test.git
use the '--force' option. If the local git directory is not the correct repo
or if you are unsure what this means, choose another name with the '--name' option.

Since the 'error:' is already there in the first line, having it
prepended before 'If you want to reuse...' felt redundant to me.

Besides, it's more of an informational message about what a user
can do next, rather than a message that signifies an error.

If there is a preferred convention or label for such messages,
I can use that. The shell version did not have any such thing though.

>> [...]
>> +	struct option options[] = {
>> +		OPT_STRING('b', "branch", &add_data.branch,
>> +			   N_("branch"),
>> +			   N_("branch of repository to checkout on cloning")),
>> +		OPT_STRING(0, "prefix", &add_data.prefix,
>> +			   N_("path"),
>> +			   N_("alternative anchor for relative paths")),
>> +		OPT_STRING(0, "path", &add_data.sm_path,
>> +			   N_("path"),
>> +			   N_("where the new submodule will be cloned to")),
>> +		OPT_STRING(0, "name", &add_data.sm_name,
>> +			   N_("string"),
>> +			   N_("name of the new submodule")),
>> +		OPT_STRING(0, "url", &add_data.realrepo,
>> +			   N_("string"),
>> +			   N_("url where to clone the submodule from")),
>> +		OPT_STRING(0, "reference", &add_data.reference_path,
>> +			   N_("repo"),
>> +			   N_("reference repository")),
>> +		OPT_BOOL(0, "dissociate", &dissociate,
>> +			 N_("use --reference only while cloning")),
>> +		OPT_INTEGER(0, "depth", &add_data.depth,
>> +			    N_("depth for shallow clones")),
>> +		OPT_BOOL(0, "progress", &progress,
>> +			 N_("force cloning progress")),
>> +		OPT_BOOL('f', "force", &force,
>> +			 N_("allow adding an otherwise ignored submodule path")),
> 
> We have OPT__FORCE, too.

Will switch over to that.

>> +		OPT__QUIET(&quiet, "Suppress output for cloning a submodule"),
> 
> And, please downcase "Suppress".

OK.

>> +		OPT_END()
>> +	};
>> +
>> +	const char *const usage[] = {
>> +		N_("git submodule--helper add-clone [--prefix=<path>] [--quiet] [--force] "
>> +		   "[--reference <repository>] [--depth <depth>] [-b|--branch <branch>]"
>> +		   "[--progress] [--dissociate] --url <url> --path <path> --name <name>"),
> 
> I think it's too crowded here, I guess it's better to write:
> 
> 	N_("git submodule--helper add-clone [<options>...] --url <url> --path <path> --name <name>"),

OK. It shouldn't be an issue to shorten it, because this is not
user-facing, and is only ever used within 'cmd_add()'.

>> +		NULL
>> +	};
>> +
>> +	argc = parse_options(argc, argv, prefix, options, usage, 0);
> 
> From above usage, I think url, path, name is required, should we have a check for them, here?

We could. The reason why I was not too rigorous about this is
because I plan to eliminate the shell interface for this helper
eventually and call add-clone from within C, in the next few
patches.

But this is a small ask, and I can just add a quick check just
to be extra safe, so I'll do it.

>> +
>> +	add_data.progress = !!progress;
>> +	add_data.dissociate = !!dissociate;
>> +	add_data.force = !!force;
>> +	add_data.quiet = !!quiet;
>> +
>> +	if (add_submodule(&add_data))
>> +		return 1;
>> +
>> +	return 0;
>> +}
>> +
>> #define SUPPORT_SUPER_PREFIX (1<<0)
>> 
>> struct cmd_struct {
>> @@ -2757,6 +2955,7 @@ static struct cmd_struct commands[] = {
>> 	{"list", module_list, 0},
>> 	{"name", module_name, 0},
>> 	{"clone", module_clone, 0},
>> +	{"add-clone", add_clone, 0},
>> 	{"update-module-mode", module_update_module_mode, 0},
>> 	{"update-clone", update_clone, 0},
>> 	{"ensure-core-worktree", ensure_core_worktree, 0},
>> diff --git a/git-submodule.sh b/git-submodule.sh
>> index 4678378424..f71e1e5495 100755
>> --- a/git-submodule.sh
>> +++ b/git-submodule.sh
>> @@ -241,43 +241,7 @@ cmd_add()
>> 		die "$(eval_gettext "'$sm_name' is not a valid submodule name")"
>> 	fi
>> 
>> -	# perhaps the path exists and is already a git repo, else clone it
>> -	if test -e "$sm_path"
>> -	then
>> -		if test -d "$sm_path"/.git || test -f "$sm_path"/.git
>> -		then
>> -			eval_gettextln "Adding existing repo at '\$sm_path' to the index"
>> -		else
>> -			die "$(eval_gettext "'\$sm_path' already exists and is not a valid git repo")"
>> -		fi
>> -
>> -	else
>> -		if test -d ".git/modules/$sm_name"
>> -		then
>> -			if test -z "$force"
>> -			then
>> -				eval_gettextln >&2 "A git directory for '\$sm_name' is found locally with remote(s):"
>> -				GIT_DIR=".git/modules/$sm_name" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^,"  ", -e s,' (fetch)',, >&2
>> -				die "$(eval_gettextln "\
>> -If you want to reuse this local git directory instead of cloning again from
>> -  \$realrepo
>> -use the '--force' option. If the local git directory is not the correct repo
>> -or you are unsure what this means choose another name with the '--name' option.")"
>> -			else
>> -				eval_gettextln "Reactivating local git directory for submodule '\$sm_name'."
>> -			fi
>> -		fi
>> -		git submodule--helper clone ${GIT_QUIET:+--quiet} ${progress:+"--progress"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
>> -		(
>> -			sanitize_submodule_env
>> -			cd "$sm_path" &&
>> -			# ash fails to wordsplit ${branch:+-b "$branch"...}
>> -			case "$branch" in
>> -			'') git checkout -f -q ;;
>> -			?*) git checkout -f -q -B "$branch" "origin/$branch" ;;
>> -			esac
>> -		) || die "$(eval_gettext "Unable to checkout submodule '\$sm_path'")"
>> -	fi
>> +	git submodule--helper add-clone ${GIT_QUIET:+--quiet} ${force:+"--force"} ${progress:+"--progress"} ${branch:+--branch "$branch"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
>> 	git config submodule."$sm_name".url "$realrepo"
>> 
>> 	git add --no-warn-embedded-repo $force "$sm_path" ||
>> -- 
>> 2.31.1
>> 
> 
> -- 
> Danh

Thanks for the comments!
Atharva Raykar June 9, 2021, 10:31 a.m. UTC | #4
On 09-Jun-2021, at 09:54, Junio C Hamano <gitster@pobox.com> wrote:
> 
> Just a bit of random comments, leaving the full review to mentors.
> 
>> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
>> index d55f6262e9..c9cb535312 100644
>> --- a/builtin/submodule--helper.c
>> +++ b/builtin/submodule--helper.c
>> @@ -2745,6 +2745,204 @@ static int module_set_branch(int argc, const char **argv, const char *prefix)
>> 	return !!ret;
>> }
>> 
>> +struct add_data {
>> +	const char *prefix;
>> +	const char *branch;
>> +	const char *reference_path;
>> +	const char *sm_path;
>> +	const char *sm_name;
>> +	const char *repo;
>> +	const char *realrepo;
>> +	int depth;
>> +	unsigned int force: 1;
>> +	unsigned int quiet: 1;
>> +	unsigned int progress: 1;
>> +	unsigned int dissociate: 1;
>> +};
>> +#define ADD_DATA_INIT { .depth = -1 }
>> +
>> +static char *parse_token(char **begin, const char *end, int *tok_len)
>> +{
>> +	char *tok_start, *pos = *begin;
> 
> Make it a habit to have a blank line between the initial block
> of declarations and the first statement.
> 
>> +	while (pos != end && (*pos != ' ' && *pos != '\t' && *pos != '\n'))
>> +		pos++;
>> +	tok_start = *begin;
>> +	*tok_len = pos - *begin;
>> +	*begin = pos + 1;
>> +	return tok_start;
>> +}
>> +static char *get_next_line(char *const begin, const char *const end)
>> +{
>> +	char *pos = begin;
>> +	while (pos != end && *pos++ != '\n');
> 
> Write an empty loop on two lines, like this:
> 
> 	while (... condition ...)
> 		; /* keep scanning */

OK.

> If there is a NUL byte between begin and end, this keeps going and
> the resulting string will contain one.  Is that a problem?
> 
>> +	return pos;
>> +}
> 
> In general, this project is mature enough that we should question
> ourselves if there is already a suitable line parser we can reuse
> when tempted to write another one.

I will keep this in mind.

>> +static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path)
>> +{
>> +	struct child_process cp_remote = CHILD_PROCESS_INIT;
>> +	struct strbuf sb_remote_out = STRBUF_INIT;
>> +
>> +	cp_remote.git_cmd = 1;
>> +	strvec_pushf(&cp_remote.env_array,
>> +		     "GIT_DIR=%s", git_dir_path);
>> +	strvec_push(&cp_remote.env_array, "GIT_WORK_TREE=.");
>> +	strvec_pushl(&cp_remote.args, "remote", "-v", NULL);
>> +	if (!capture_command(&cp_remote, &sb_remote_out, 0)) {
>> +		char *line;
>> +		char *begin = sb_remote_out.buf;
>> +		char *end = sb_remote_out.buf + sb_remote_out.len;
>> +		while (begin != end && (line = get_next_line(begin, end))) {
> 
> OK, so this tries to parse output from "git remote -v", so NUL will
> not be an issue at all.  We will get a string that is NUL terminated
> and has zero or more lines, terminated with LFs.
> 
> If that is the case, I think it is far easier to read without
> a custom get-next-line wrapper, e.g.
> 
> 	for (this_line = begin;
> 	     *this_line;
> 	     this_line = next_line) {
> 		next_line = strchrnul(this_line, '\n');
> 		... process bytes between this_line..next_line ...
> 	}                
> 
>> +			int namelen = 0, urllen = 0, taillen = 0;
>> +			char *name = parse_token(&begin, line, &namelen);
> 
> Similarly, consider if strcspn() is useful in implementing
> parse_token().  See how existing code uses the standard system
> function with
> 
> 	$ git grep strcspn \*.c
> 
>> +			char *url = parse_token(&begin, line, &urllen);
>> +			char *tail = parse_token(&begin, line, &taillen);
>> +			if (!memcmp(tail, "(fetch)", 7))
> 
> At this point do we know there are enough number of bytes after
> tail[0] to allow us to do this comparison safely?  Otherwise,
> 
> 			if (starts_with(tail, "(fetch)")
> 
> may be preferrable.

This solution is definitely an improvement over what I was doing.

That said, I like Danh's suggestion[1] more, because it eliminates the
need for parsing tokens entirely.

The fundamental thing that piece of code was meant to do is:

"If this line ends with '(fetch)', print the line, but without the '(fetch)'"

Parsing tokens only to put them back together through fprintf() may
not be necessary for this usage, so using strchr() with
strip_suffix_mem() should do the trick.

[1] https://lore.kernel.org/git/YL9jTFAoEBP+mDA2@danh.dev/

Thanks for the comments :^)
Đoàn Trần Công Danh June 9, 2021, 1:06 p.m. UTC | #5
On 2021-06-09 16:01:40+0530, Atharva Raykar <raykar.ath@gmail.com> wrote:
> On 08-Jun-2021, at 18:02, Đoàn Trần Công Danh <congdanhqx@gmail.com> wrote:
> > 
> >> [...]
> >> +static char *get_next_line(char *const begin, const char *const end)
> >> +{
> >> +	char *pos = begin;
> >> +	while (pos != end && *pos++ != '\n');
> >> +	return pos;
> >> +}
> > 
> > On my first glance, this function looks like a reinvention of strchr(3).
> > Except that, this function also has a second parameter for "end".
> > Maybe it has a specical use-case?
> > 
> >> +
> >> +static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path)
> >> +{
> >> +	struct child_process cp_remote = CHILD_PROCESS_INIT;
> >> +	struct strbuf sb_remote_out = STRBUF_INIT;
> >> +
> >> +	cp_remote.git_cmd = 1;
> >> +	strvec_pushf(&cp_remote.env_array,
> >> +		     "GIT_DIR=%s", git_dir_path);
> >> +	strvec_push(&cp_remote.env_array, "GIT_WORK_TREE=.");
> >> +	strvec_pushl(&cp_remote.args, "remote", "-v", NULL);
> >> +	if (!capture_command(&cp_remote, &sb_remote_out, 0)) {
> >> +		char *line;
> >> +		char *begin = sb_remote_out.buf;
> >> +		char *end = sb_remote_out.buf + sb_remote_out.len;
> >> +		while (begin != end && (line = get_next_line(begin, end))) {
> > 
> > And this is the only use-case.  Because you also want to check if you
> > reached the last token or not.  I guess you really meant to write:
> > 
> > 	while ((line = strchr(begin, '\n')) != NULL) {
> > 
> > Anyway, I would name the "line" variable as "nextline"
> > 
> >> +			int namelen = 0, urllen = 0, taillen = 0;
> >> +			char *name = parse_token(&begin, line, &namelen);
> >> +			char *url = parse_token(&begin, line, &urllen);
> >> +			char *tail = parse_token(&begin, line, &taillen);
> >> +			if (!memcmp(tail, "(fetch)", 7))
> >> +				fprintf(output, "  %.*s\t%.*s\n",
> >> +					namelen, name, urllen, url);
> > 
> > I think this whole block is better replaced with strip_suffix_mem and
> > fprintf.
> > 
> > Overral I would replace the block inside capture_command with:
> > 
> > -----8<-----
> > 	char *nextline;
> > 	char *line = sb_remote_out.buf;
> > 	while ((nextline = strchr(line, '\n')) != NULL) {
> > 		size_t len = nextline - line;
> > 		if (strip_suffix_mem(line, &len, "(fetch)"))
> > 			fprintf(output, "  %.*s\n", (int)len, line);

Fix-up for my suggestion:

To be bug-for-bug with shell implementation, it should be:

		if (strip_suffix_mem(line, &len, " (fetch)"))

> > 		line = nextline + 1;
> > 	}
> > ---->8-----
> > 
> > And get rid of parse_token and get_next_line functions.
> 
> That looks much simpler. Thanks!
> 
> I realised that all the token parsing I do is not really necessary.
> What I really want to do is "If this line ends with '(fetch)',
> print it, but without the '(fetch)'", and I think your version
> captures that succinctly.
> 
> >> +		}
> >> +	}
> >> +
> >> +	strbuf_release(&sb_remote_out);
> >> +}
> >> +
> >> +static int add_submodule(const struct add_data *add_data)
> >> +{
> >> +	char *submod_gitdir_path;
> >> +	/* perhaps the path already exists and is already a git repo, else clone it */
> >> +	if (is_directory(add_data->sm_path)) {
> >> +		submod_gitdir_path = xstrfmt("%s/.git", add_data->sm_path);
> >> +		if (is_directory(submod_gitdir_path) || file_exists(submod_gitdir_path))
> >> +			printf(_("Adding existing path at '%s' to index\n"),
> >> +			       add_data->sm_path);
> >> +		else
> >> +			die(_("'%s' already exists and is not a valid git repo"),
> >> +			    add_data->sm_path);
> >> +		free(submod_gitdir_path);
> >> +	} else {
> >> +		struct strvec clone_args = STRVEC_INIT;
> >> +		struct child_process cp = CHILD_PROCESS_INIT;
> >> +		submod_gitdir_path = xstrfmt(".git/modules/%s", add_data->sm_name);
> >> +
> >> +		if (is_directory(submod_gitdir_path)) {
> >> +			if (!add_data->force) {
> >> +				error(_("A git directory for '%s' is found "
> >> +					"locally with remote(s):"), add_data->sm_name);
> > 
> > We don't capitalise first character of error message.
> > IOW, downcase "A git".
> 
> Got it.
> 
> > Well, it's bug-for-bug with shell implementation, so it doesn't matter much, anyway.
> 
> While it is meant to be a faithful implementation, I think this
> is a good opportunity to make minor style fixes.
> 
> >> +				show_fetch_remotes(stderr, add_data->sm_name,
> >> +						   submod_gitdir_path);
> >> +				fprintf(stderr,
> >> +					_("If you want to reuse this local git "
> >> +					  "directory instead of cloning again from\n"
> >> +					  "  %s\n"
> >> +					  "use the '--force' option. If the local git "
> >> +					  "directory is not the correct repo\n"
> >> +					  "or if you are unsure what this means, choose "
> >> +					  "another name with the '--name' option.\n"),
> >> +					add_data->realrepo);
> > 
> > Is there any reason we can't use "error" here?
> 
> The message in its entirety looks like this:
> 
> error: A git directory for 'test' is found locally with remote(s):
>   origin	git@github.com:tfidfwastaken/abc.git
> If you want to reuse this local git directory instead of cloning again from
>   git@github.com:tfidfwastaken/test.git
> use the '--force' option. If the local git directory is not the correct repo
> or if you are unsure what this means, choose another name with the '--name' option.
> 
> Since the 'error:' is already there in the first line, having it
> prepended before 'If you want to reuse...' felt redundant to me.
> 
> Besides, it's more of an informational message about what a user
> can do next, rather than a message that signifies an error.
> 
> If there is a preferred convention or label for such messages,
> I can use that. The shell version did not have any such thing though.
> 
> >> [...]
> >> +	struct option options[] = {
> >> +		OPT_STRING('b', "branch", &add_data.branch,
> >> +			   N_("branch"),
> >> +			   N_("branch of repository to checkout on cloning")),
> >> +		OPT_STRING(0, "prefix", &add_data.prefix,
> >> +			   N_("path"),
> >> +			   N_("alternative anchor for relative paths")),
> >> +		OPT_STRING(0, "path", &add_data.sm_path,
> >> +			   N_("path"),
> >> +			   N_("where the new submodule will be cloned to")),
> >> +		OPT_STRING(0, "name", &add_data.sm_name,
> >> +			   N_("string"),
> >> +			   N_("name of the new submodule")),
> >> +		OPT_STRING(0, "url", &add_data.realrepo,
> >> +			   N_("string"),
> >> +			   N_("url where to clone the submodule from")),
> >> +		OPT_STRING(0, "reference", &add_data.reference_path,
> >> +			   N_("repo"),
> >> +			   N_("reference repository")),
> >> +		OPT_BOOL(0, "dissociate", &dissociate,
> >> +			 N_("use --reference only while cloning")),
> >> +		OPT_INTEGER(0, "depth", &add_data.depth,
> >> +			    N_("depth for shallow clones")),
> >> +		OPT_BOOL(0, "progress", &progress,
> >> +			 N_("force cloning progress")),
> >> +		OPT_BOOL('f', "force", &force,
> >> +			 N_("allow adding an otherwise ignored submodule path")),
> > 
> > We have OPT__FORCE, too.
> 
> Will switch over to that.
> 
> >> +		OPT__QUIET(&quiet, "Suppress output for cloning a submodule"),
> > 
> > And, please downcase "Suppress".
> 
> OK.
> 
> >> +		OPT_END()
> >> +	};
> >> +
> >> +	const char *const usage[] = {
> >> +		N_("git submodule--helper add-clone [--prefix=<path>] [--quiet] [--force] "
> >> +		   "[--reference <repository>] [--depth <depth>] [-b|--branch <branch>]"
> >> +		   "[--progress] [--dissociate] --url <url> --path <path> --name <name>"),
> > 
> > I think it's too crowded here, I guess it's better to write:
> > 
> > 	N_("git submodule--helper add-clone [<options>...] --url <url> --path <path> --name <name>"),
> 
> OK. It shouldn't be an issue to shorten it, because this is not
> user-facing, and is only ever used within 'cmd_add()'.
> 
> >> +		NULL
> >> +	};
> >> +
> >> +	argc = parse_options(argc, argv, prefix, options, usage, 0);
> > 
> > From above usage, I think url, path, name is required, should we have a check for them, here?
> 
> We could. The reason why I was not too rigorous about this is
> because I plan to eliminate the shell interface for this helper
> eventually and call add-clone from within C, in the next few
> patches.
> 
> But this is a small ask, and I can just add a quick check just
> to be extra safe, so I'll do it.
> 
> >> +
> >> +	add_data.progress = !!progress;
> >> +	add_data.dissociate = !!dissociate;
> >> +	add_data.force = !!force;
> >> +	add_data.quiet = !!quiet;
> >> +
> >> +	if (add_submodule(&add_data))
> >> +		return 1;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> #define SUPPORT_SUPER_PREFIX (1<<0)
> >> 
> >> struct cmd_struct {
> >> @@ -2757,6 +2955,7 @@ static struct cmd_struct commands[] = {
> >> 	{"list", module_list, 0},
> >> 	{"name", module_name, 0},
> >> 	{"clone", module_clone, 0},
> >> +	{"add-clone", add_clone, 0},
> >> 	{"update-module-mode", module_update_module_mode, 0},
> >> 	{"update-clone", update_clone, 0},
> >> 	{"ensure-core-worktree", ensure_core_worktree, 0},
> >> diff --git a/git-submodule.sh b/git-submodule.sh
> >> index 4678378424..f71e1e5495 100755
> >> --- a/git-submodule.sh
> >> +++ b/git-submodule.sh
> >> @@ -241,43 +241,7 @@ cmd_add()
> >> 		die "$(eval_gettext "'$sm_name' is not a valid submodule name")"
> >> 	fi
> >> 
> >> -	# perhaps the path exists and is already a git repo, else clone it
> >> -	if test -e "$sm_path"
> >> -	then
> >> -		if test -d "$sm_path"/.git || test -f "$sm_path"/.git
> >> -		then
> >> -			eval_gettextln "Adding existing repo at '\$sm_path' to the index"
> >> -		else
> >> -			die "$(eval_gettext "'\$sm_path' already exists and is not a valid git repo")"
> >> -		fi
> >> -
> >> -	else
> >> -		if test -d ".git/modules/$sm_name"
> >> -		then
> >> -			if test -z "$force"
> >> -			then
> >> -				eval_gettextln >&2 "A git directory for '\$sm_name' is found locally with remote(s):"
> >> -				GIT_DIR=".git/modules/$sm_name" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^,"  ", -e s,' (fetch)',, >&2
> >> -				die "$(eval_gettextln "\
> >> -If you want to reuse this local git directory instead of cloning again from
> >> -  \$realrepo
> >> -use the '--force' option. If the local git directory is not the correct repo
> >> -or you are unsure what this means choose another name with the '--name' option.")"
> >> -			else
> >> -				eval_gettextln "Reactivating local git directory for submodule '\$sm_name'."
> >> -			fi
> >> -		fi
> >> -		git submodule--helper clone ${GIT_QUIET:+--quiet} ${progress:+"--progress"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
> >> -		(
> >> -			sanitize_submodule_env
> >> -			cd "$sm_path" &&
> >> -			# ash fails to wordsplit ${branch:+-b "$branch"...}
> >> -			case "$branch" in
> >> -			'') git checkout -f -q ;;
> >> -			?*) git checkout -f -q -B "$branch" "origin/$branch" ;;
> >> -			esac
> >> -		) || die "$(eval_gettext "Unable to checkout submodule '\$sm_path'")"
> >> -	fi
> >> +	git submodule--helper add-clone ${GIT_QUIET:+--quiet} ${force:+"--force"} ${progress:+"--progress"} ${branch:+--branch "$branch"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
> >> 	git config submodule."$sm_name".url "$realrepo"
> >> 
> >> 	git add --no-warn-embedded-repo $force "$sm_path" ||
> >> -- 
> >> 2.31.1
> >> 
> > 
> > -- 
> > Danh
> 
> Thanks for the comments!
Atharva Raykar June 9, 2021, 1:10 p.m. UTC | #6
On 09-Jun-2021, at 18:36, Đoàn Trần Công Danh <congdanhqx@gmail.com> wrote:
>>> 
>>> Overral I would replace the block inside capture_command with:
>>> 
>>> -----8<-----
>>> 	char *nextline;
>>> 	char *line = sb_remote_out.buf;
>>> 	while ((nextline = strchr(line, '\n')) != NULL) {
>>> 		size_t len = nextline - line;
>>> 		if (strip_suffix_mem(line, &len, "(fetch)"))
>>> 			fprintf(output, "  %.*s\n", (int)len, line);
> 
> Fix-up for my suggestion:
> 
> To be bug-for-bug with shell implementation, it should be:
> 
> 		if (strip_suffix_mem(line, &len, " (fetch)"))

That is very subtle, and I would have definitely missed it.
Thanks.
diff mbox series

Patch

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index d55f6262e9..c9cb535312 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -2745,6 +2745,204 @@  static int module_set_branch(int argc, const char **argv, const char *prefix)
 	return !!ret;
 }
 
+struct add_data {
+	const char *prefix;
+	const char *branch;
+	const char *reference_path;
+	const char *sm_path;
+	const char *sm_name;
+	const char *repo;
+	const char *realrepo;
+	int depth;
+	unsigned int force: 1;
+	unsigned int quiet: 1;
+	unsigned int progress: 1;
+	unsigned int dissociate: 1;
+};
+#define ADD_DATA_INIT { .depth = -1 }
+
+static char *parse_token(char **begin, const char *end, int *tok_len)
+{
+	char *tok_start, *pos = *begin;
+	while (pos != end && (*pos != ' ' && *pos != '\t' && *pos != '\n'))
+		pos++;
+	tok_start = *begin;
+	*tok_len = pos - *begin;
+	*begin = pos + 1;
+	return tok_start;
+}
+
+static char *get_next_line(char *const begin, const char *const end)
+{
+	char *pos = begin;
+	while (pos != end && *pos++ != '\n');
+	return pos;
+}
+
+static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path)
+{
+	struct child_process cp_remote = CHILD_PROCESS_INIT;
+	struct strbuf sb_remote_out = STRBUF_INIT;
+
+	cp_remote.git_cmd = 1;
+	strvec_pushf(&cp_remote.env_array,
+		     "GIT_DIR=%s", git_dir_path);
+	strvec_push(&cp_remote.env_array, "GIT_WORK_TREE=.");
+	strvec_pushl(&cp_remote.args, "remote", "-v", NULL);
+	if (!capture_command(&cp_remote, &sb_remote_out, 0)) {
+		char *line;
+		char *begin = sb_remote_out.buf;
+		char *end = sb_remote_out.buf + sb_remote_out.len;
+		while (begin != end && (line = get_next_line(begin, end))) {
+			int namelen = 0, urllen = 0, taillen = 0;
+			char *name = parse_token(&begin, line, &namelen);
+			char *url = parse_token(&begin, line, &urllen);
+			char *tail = parse_token(&begin, line, &taillen);
+			if (!memcmp(tail, "(fetch)", 7))
+				fprintf(output, "  %.*s\t%.*s\n",
+					namelen, name, urllen, url);
+		}
+	}
+
+	strbuf_release(&sb_remote_out);
+}
+
+static int add_submodule(const struct add_data *add_data)
+{
+	char *submod_gitdir_path;
+	/* perhaps the path already exists and is already a git repo, else clone it */
+	if (is_directory(add_data->sm_path)) {
+		submod_gitdir_path = xstrfmt("%s/.git", add_data->sm_path);
+		if (is_directory(submod_gitdir_path) || file_exists(submod_gitdir_path))
+			printf(_("Adding existing path at '%s' to index\n"),
+			       add_data->sm_path);
+		else
+			die(_("'%s' already exists and is not a valid git repo"),
+			    add_data->sm_path);
+		free(submod_gitdir_path);
+	} else {
+		struct strvec clone_args = STRVEC_INIT;
+		struct child_process cp = CHILD_PROCESS_INIT;
+		submod_gitdir_path = xstrfmt(".git/modules/%s", add_data->sm_name);
+
+		if (is_directory(submod_gitdir_path)) {
+			if (!add_data->force) {
+				error(_("A git directory for '%s' is found "
+					"locally with remote(s):"), add_data->sm_name);
+				show_fetch_remotes(stderr, add_data->sm_name,
+						   submod_gitdir_path);
+				fprintf(stderr,
+					_("If you want to reuse this local git "
+					  "directory instead of cloning again from\n"
+					  "  %s\n"
+					  "use the '--force' option. If the local git "
+					  "directory is not the correct repo\n"
+					  "or if you are unsure what this means, choose "
+					  "another name with the '--name' option.\n"),
+					add_data->realrepo);
+				free(submod_gitdir_path);
+				return 1;
+			} else {
+				printf(_("Reactivating local git directory for "
+					 "submodule '%s'\n"), add_data->sm_name);
+			}
+		}
+		free(submod_gitdir_path);
+
+		strvec_pushl(&clone_args, "clone", "--path", add_data->sm_path, "--name",
+			     add_data->sm_name, "--url", add_data->realrepo, NULL);
+		if (add_data->quiet)
+			strvec_push(&clone_args, "--quiet");
+		if (add_data->progress)
+			strvec_push(&clone_args, "--progress");
+		if (add_data->prefix)
+			strvec_pushl(&clone_args, "--prefix", add_data->prefix, NULL);
+		if (add_data->reference_path)
+			strvec_pushl(&clone_args, "--reference",
+				     add_data->reference_path, NULL);
+		if (add_data->dissociate)
+			strvec_push(&clone_args, "--dissociate");
+		if (add_data->depth >= 0)
+			strvec_pushf(&clone_args, "--depth=%d", add_data->depth);
+
+		if (module_clone(clone_args.nr, clone_args.v, add_data->prefix)) {
+			strvec_clear(&clone_args);
+			return -1;
+		}
+		strvec_clear(&clone_args);
+
+		prepare_submodule_repo_env(&cp.env_array);
+		cp.git_cmd = 1;
+		cp.dir = add_data->sm_path;
+		strvec_pushl(&cp.args, "checkout", "-f", "-q", NULL);
+
+		if (add_data->branch) {
+			strvec_pushl(&cp.args, "-B", add_data->branch, NULL);
+			strvec_pushf(&cp.args, "origin/%s", add_data->branch);
+		}
+
+		if (run_command(&cp))
+			die(_("unable to checkout submodule '%s'"), add_data->sm_path);
+	}
+	return 0;
+}
+
+static int add_clone(int argc, const char **argv, const char *prefix)
+{
+	int force = 0, quiet = 0, dissociate = 0, progress = 0;
+	struct add_data add_data = ADD_DATA_INIT;
+
+	struct option options[] = {
+		OPT_STRING('b', "branch", &add_data.branch,
+			   N_("branch"),
+			   N_("branch of repository to checkout on cloning")),
+		OPT_STRING(0, "prefix", &add_data.prefix,
+			   N_("path"),
+			   N_("alternative anchor for relative paths")),
+		OPT_STRING(0, "path", &add_data.sm_path,
+			   N_("path"),
+			   N_("where the new submodule will be cloned to")),
+		OPT_STRING(0, "name", &add_data.sm_name,
+			   N_("string"),
+			   N_("name of the new submodule")),
+		OPT_STRING(0, "url", &add_data.realrepo,
+			   N_("string"),
+			   N_("url where to clone the submodule from")),
+		OPT_STRING(0, "reference", &add_data.reference_path,
+			   N_("repo"),
+			   N_("reference repository")),
+		OPT_BOOL(0, "dissociate", &dissociate,
+			 N_("use --reference only while cloning")),
+		OPT_INTEGER(0, "depth", &add_data.depth,
+			    N_("depth for shallow clones")),
+		OPT_BOOL(0, "progress", &progress,
+			 N_("force cloning progress")),
+		OPT_BOOL('f', "force", &force,
+			 N_("allow adding an otherwise ignored submodule path")),
+		OPT__QUIET(&quiet, "Suppress output for cloning a submodule"),
+		OPT_END()
+	};
+
+	const char *const usage[] = {
+		N_("git submodule--helper add-clone [--prefix=<path>] [--quiet] [--force] "
+		   "[--reference <repository>] [--depth <depth>] [-b|--branch <branch>]"
+		   "[--progress] [--dissociate] --url <url> --path <path> --name <name>"),
+		NULL
+	};
+
+	argc = parse_options(argc, argv, prefix, options, usage, 0);
+
+	add_data.progress = !!progress;
+	add_data.dissociate = !!dissociate;
+	add_data.force = !!force;
+	add_data.quiet = !!quiet;
+
+	if (add_submodule(&add_data))
+		return 1;
+
+	return 0;
+}
+
 #define SUPPORT_SUPER_PREFIX (1<<0)
 
 struct cmd_struct {
@@ -2757,6 +2955,7 @@  static struct cmd_struct commands[] = {
 	{"list", module_list, 0},
 	{"name", module_name, 0},
 	{"clone", module_clone, 0},
+	{"add-clone", add_clone, 0},
 	{"update-module-mode", module_update_module_mode, 0},
 	{"update-clone", update_clone, 0},
 	{"ensure-core-worktree", ensure_core_worktree, 0},
diff --git a/git-submodule.sh b/git-submodule.sh
index 4678378424..f71e1e5495 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -241,43 +241,7 @@  cmd_add()
 		die "$(eval_gettext "'$sm_name' is not a valid submodule name")"
 	fi
 
-	# perhaps the path exists and is already a git repo, else clone it
-	if test -e "$sm_path"
-	then
-		if test -d "$sm_path"/.git || test -f "$sm_path"/.git
-		then
-			eval_gettextln "Adding existing repo at '\$sm_path' to the index"
-		else
-			die "$(eval_gettext "'\$sm_path' already exists and is not a valid git repo")"
-		fi
-
-	else
-		if test -d ".git/modules/$sm_name"
-		then
-			if test -z "$force"
-			then
-				eval_gettextln >&2 "A git directory for '\$sm_name' is found locally with remote(s):"
-				GIT_DIR=".git/modules/$sm_name" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^,"  ", -e s,' (fetch)',, >&2
-				die "$(eval_gettextln "\
-If you want to reuse this local git directory instead of cloning again from
-  \$realrepo
-use the '--force' option. If the local git directory is not the correct repo
-or you are unsure what this means choose another name with the '--name' option.")"
-			else
-				eval_gettextln "Reactivating local git directory for submodule '\$sm_name'."
-			fi
-		fi
-		git submodule--helper clone ${GIT_QUIET:+--quiet} ${progress:+"--progress"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
-		(
-			sanitize_submodule_env
-			cd "$sm_path" &&
-			# ash fails to wordsplit ${branch:+-b "$branch"...}
-			case "$branch" in
-			'') git checkout -f -q ;;
-			?*) git checkout -f -q -B "$branch" "origin/$branch" ;;
-			esac
-		) || die "$(eval_gettext "Unable to checkout submodule '\$sm_path'")"
-	fi
+	git submodule--helper add-clone ${GIT_QUIET:+--quiet} ${force:+"--force"} ${progress:+"--progress"} ${branch:+--branch "$branch"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
 	git config submodule."$sm_name".url "$realrepo"
 
 	git add --no-warn-embedded-repo $force "$sm_path" ||