diff mbox series

[v3,1/3] dir: change the scope of function 'directory_exists_in_index()'

Message ID 20201214231939.644175-2-periperidip@gmail.com (mailing list archive)
State New, archived
Headers show
Series submodule: port subcommand add from shell to C | expand

Commit Message

Shourya Shukla Dec. 14, 2020, 11:19 p.m. UTC
Change the scope of the function 'directory_exists_in_index()' as well
as declare it in 'dir.h'.

Since the return type of the function is the enumerator 'exist_status',
change its scope as well and declare it in 'dir.h'. While at it, rename
the members of the aforementioned enum so as to avoid any naming clashes
or confusions later on.

Helped-by: Christian Couder <christian.couder@gmail.com>
Helped-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
Signed-off-by: Shourya Shukla <periperidip@gmail.com>
---
 builtin/submodule--helper.c | 408 ++++++++++++++++++++++++++++++++++++
 dir.c                       |  30 ++-
 dir.h                       |   9 +
 3 files changed, 429 insertions(+), 18 deletions(-)

Comments

Johannes Schindelin Dec. 19, 2020, 12:08 a.m. UTC | #1
Hi Shourya,

On Tue, 15 Dec 2020, Shourya Shukla wrote:

> Change the scope of the function 'directory_exists_in_index()' as well
> as declare it in 'dir.h'.
>
> Since the return type of the function is the enumerator 'exist_status',
> change its scope as well and declare it in 'dir.h'. While at it, rename
> the members of the aforementioned enum so as to avoid any naming clashes
> or confusions later on.

This makes it sound as if only existing code was adjusted, in a minimal
way, but no new code was introduced. But that's not true:

>
> Helped-by: Christian Couder <christian.couder@gmail.com>
> Helped-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
> Signed-off-by: Shourya Shukla <periperidip@gmail.com>
> ---
>  builtin/submodule--helper.c | 408 ++++++++++++++++++++++++++++++++++++
>  dir.c                       |  30 ++-
>  dir.h                       |   9 +
>  3 files changed, 429 insertions(+), 18 deletions(-)

Tons of new code there. And unfortunately...

>
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index c30896c897..4dfad35d77 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -2744,6 +2744,414 @@ static int module_set_branch(int argc, const char **argv, const char *prefix)
>  	return !!ret;
>  }
>
> +struct add_data {
> +	const char *prefix;
> +	const char *branch;
> +	const char *reference_path;
> +	const char *sm_path;
> +	const char *sm_name;
> +	const char *repo;
> +	const char *realrepo;
> +	int depth;
> +	unsigned int force: 1;
> +	unsigned int quiet: 1;
> +	unsigned int progress: 1;
> +	unsigned int dissociate: 1;
> +};
> +#define ADD_DATA_INIT { 0 }
> +
> +/*
> + * Guess the directory name from the repository URL by performing the
> + * operations below in the following order:
> + *
> + * - If the URL ends with '/', remove that.
> + *
> + * - If the result of the above ends with zero or more ':', followed
> + *  by zero or more '/', followed by ".git", drop the matching part.
> + *
> + * - If the result of the above has '/' or ':' in it, remove everything
> + *  before it and '/' or ':' itself.
> + */
> +static char *guess_dir_name(const char *repo)
> +{
> +	const char *start, *end;
> +
> +	start = repo;
> +	end = repo + strlen(repo);
> +
> +	/* remove the trailing '/' */
> +	if (repo < end - 1 && end[-1] == '/')
> +		end--;
> +
> +	/* remove the trailing ':', '/' and '.git' */
> +	if (repo < end - 4 && !memcmp(".git", end - 4, 4)) {
> +		end -= 4;
> +		while (repo < end - 1 && end[-1] == '/')
> +			end--;
> +		while (repo < end - 1 && end[-1] == ':')
> +			end--;
> +	}
> +
> +	/* find the last ':' or '/' */
> +	for (start = end - 1; repo <= start; start--) {
> +		if (*start == '/' || *start == ':')
> +			break;
> +	}
> +	/* exclude '/' or ':' itself */
> +	start++;
> +
> +	return xmemdupz(start, end - start);
> +}
> +
> +static int can_create_submodule(unsigned int force, const char *path)
> +{
> +	int cache_pos, dir_in_cache = 0;
> +
> +	if (read_cache() < 0)
> +		die(_("index file corrupt"));
> +
> +	cache_pos = cache_name_pos(path, strlen(path));
> +	if(cache_pos < 0 &&
> +	   directory_exists_in_index(&the_index, path, strlen(path)) == is_cache_directory)
> +		dir_in_cache = 1;
> +
> +	if (!force) {
> +		if (cache_pos >= 0 || dir_in_cache)
> +			die(_("'%s' already exists in the index"), path);
> +	} else {
> +		struct cache_entry *ce = NULL;
> +		if (cache_pos >= 0)
> +			ce = the_index.cache[cache_pos];
> +		if (dir_in_cache || (ce && !S_ISGITLINK(ce->ce_mode)))
> +			die(_("'%s' already exists in the index and is not a "
> +			      "submodule"), path);
> +	}
> +	return 0;
> +}
> +
> +static const char *parse_token(const char *cp, int *len)
> +{
> +	const char *p = cp, *start, *end;
> +	char *str;
> +
> +	start = p;
> +	while (*p != ' ')
> +		p++;
> +	end = p;
> +	str = xstrndup(start, end - start);
> +
> +	while(*p == ' ')
> +		p++;
> +
> +	return str;
> +}

This function is not careful enough to avoid buffer overruns. It even
triggers a segmentation fault in our test suite:
https://github.com/gitgitgadget/git/runs/1574891976?check_suite_focus=true#step:6:3152

I need this to make it pass (only tested locally so far, but I trust you
to take the baton from here):

-- snipsnap --
From c28c0cd3ac21d546394335957fbaa350ab287c3f Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Sat, 19 Dec 2020 01:02:04 +0100
Subject: [PATCH] fixup??? dir: change the scope of function
 'directory_exists_in_index()'

This fixes the segmentation fault reported in the linux-musl job of our
CI builds. Valgrind has this to say about it:

==32354==
==32354== Process terminating with default action of signal 11 (SIGSEGV)
==32354==  Access not within mapped region at address 0x5C73000
==32354==    at 0x202F5A: parse_token (submodule--helper.c:2837)
==32354==    by 0x20319B: report_fetch_remotes (submodule--helper.c:2871)
==32354==    by 0x2033FD: add_submodule (submodule--helper.c:2898)
==32354==    by 0x204612: module_add (submodule--helper.c:3146)
==32354==    by 0x20478A: cmd_submodule__helper (submodule--helper.c:3202)
==32354==    by 0x12655E: run_builtin (git.c:458)
==32354==    by 0x1269B4: handle_builtin (git.c:712)
==32354==    by 0x126C79: run_argv (git.c:779)
==32354==    by 0x12715C: cmd_main (git.c:913)
==32354==    by 0x2149A2: main (common-main.c:52)

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/submodule--helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 4f1d892b9a9..29a6f80b937 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -2834,12 +2834,12 @@ static const char *parse_token(const char *cp, int *len)
 	char *str;

 	start = p;
-	while (*p != ' ')
+	while (*p && *p != ' ')
 		p++;
 	end = p;
 	str = xstrndup(start, end - start);

-	while(*p == ' ')
+	while(*p && *p == ' ')
 		p++;

 	return str;
--
2.29.2.windows.1.1.g3464b98ce68
Junio C Hamano Dec. 19, 2020, 12:47 a.m. UTC | #2
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Hi Shourya,
>
> On Tue, 15 Dec 2020, Shourya Shukla wrote:
>
>> Change the scope of the function 'directory_exists_in_index()' as well
>> as declare it in 'dir.h'.
>>
>> Since the return type of the function is the enumerator 'exist_status',
>> change its scope as well and declare it in 'dir.h'. While at it, rename
>> the members of the aforementioned enum so as to avoid any naming clashes
>> or confusions later on.
>
> This makes it sound as if only existing code was adjusted, in a minimal
> way, but no new code was introduced. But that's not true:

I noticed it last night, too---I suspect it was a mistake made while
shuffling changes across steps with rebase -i.

>> Helped-by: Christian Couder <christian.couder@gmail.com>
>> Helped-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
>> Signed-off-by: Shourya Shukla <periperidip@gmail.com>
>> ---
>>  builtin/submodule--helper.c | 408 ++++++++++++++++++++++++++++++++++++
>>  dir.c                       |  30 ++-
>>  dir.h                       |   9 +
>>  3 files changed, 429 insertions(+), 18 deletions(-)
>
> Tons of new code there. And unfortunately...

>> +static const char *parse_token(const char *cp, int *len)
>> +{
>> +	const char *p = cp, *start, *end;
>> +	char *str;
>> +
>> +	start = p;
>> +	while (*p != ' ')
>> +		p++;
>> +	end = p;
>> +	str = xstrndup(start, end - start);
>> +
>> +	while(*p == ' ')
>> +		p++;
>> +
>> +	return str;
>> +}
>
> This function is not careful enough to avoid buffer overruns. It even
> triggers a segmentation fault in our test suite:
> https://github.com/gitgitgadget/git/runs/1574891976?check_suite_focus=true#step:6:3152

I notice that len is not used at all ;-)

> I need this to make it pass (only tested locally so far, but I trust you
> to take the baton from here):
>
> -- snipsnap --
> From c28c0cd3ac21d546394335957fbaa350ab287c3f Mon Sep 17 00:00:00 2001
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> Date: Sat, 19 Dec 2020 01:02:04 +0100
> Subject: [PATCH] fixup??? dir: change the scope of function
>  'directory_exists_in_index()'
>
> This fixes the segmentation fault reported in the linux-musl job of our
> CI builds. Valgrind has this to say about it:
>
> ==32354==
> ==32354== Process terminating with default action of signal 11 (SIGSEGV)
> ==32354==  Access not within mapped region at address 0x5C73000
> ==32354==    at 0x202F5A: parse_token (submodule--helper.c:2837)
> ==32354==    by 0x20319B: report_fetch_remotes (submodule--helper.c:2871)
> ==32354==    by 0x2033FD: add_submodule (submodule--helper.c:2898)
> ==32354==    by 0x204612: module_add (submodule--helper.c:3146)
> ==32354==    by 0x20478A: cmd_submodule__helper (submodule--helper.c:3202)
> ==32354==    by 0x12655E: run_builtin (git.c:458)
> ==32354==    by 0x1269B4: handle_builtin (git.c:712)
> ==32354==    by 0x126C79: run_argv (git.c:779)
> ==32354==    by 0x12715C: cmd_main (git.c:913)
> ==32354==    by 0x2149A2: main (common-main.c:52)
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
>  builtin/submodule--helper.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index 4f1d892b9a9..29a6f80b937 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -2834,12 +2834,12 @@ static const char *parse_token(const char *cp, int *len)
>  	char *str;
>
>  	start = p;
> -	while (*p != ' ')
> +	while (*p && *p != ' ')
>  		p++;
>  	end = p;
>  	str = xstrndup(start, end - start);
>
> -	while(*p == ' ')
> +	while(*p && *p == ' ')
>  		p++;
>
>  	return str;
> --
> 2.29.2.windows.1.1.g3464b98ce68
diff mbox series

Patch

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index c30896c897..4dfad35d77 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -2744,6 +2744,414 @@  static int module_set_branch(int argc, const char **argv, const char *prefix)
 	return !!ret;
 }
 
+struct add_data {
+	const char *prefix;
+	const char *branch;
+	const char *reference_path;
+	const char *sm_path;
+	const char *sm_name;
+	const char *repo;
+	const char *realrepo;
+	int depth;
+	unsigned int force: 1;
+	unsigned int quiet: 1;
+	unsigned int progress: 1;
+	unsigned int dissociate: 1;
+};
+#define ADD_DATA_INIT { 0 }
+
+/*
+ * Guess the directory name from the repository URL by performing the
+ * operations below in the following order:
+ *
+ * - If the URL ends with '/', remove that.
+ *
+ * - If the result of the above ends with zero or more ':', followed
+ *  by zero or more '/', followed by ".git", drop the matching part.
+ *
+ * - If the result of the above has '/' or ':' in it, remove everything
+ *  before it and '/' or ':' itself.
+ */
+static char *guess_dir_name(const char *repo)
+{
+	const char *start, *end;
+
+	start = repo;
+	end = repo + strlen(repo);
+
+	/* remove the trailing '/' */
+	if (repo < end - 1 && end[-1] == '/')
+		end--;
+
+	/* remove the trailing ':', '/' and '.git' */
+	if (repo < end - 4 && !memcmp(".git", end - 4, 4)) {
+		end -= 4;
+		while (repo < end - 1 && end[-1] == '/')
+			end--;
+		while (repo < end - 1 && end[-1] == ':')
+			end--;
+	}
+
+	/* find the last ':' or '/' */
+	for (start = end - 1; repo <= start; start--) {
+		if (*start == '/' || *start == ':')
+			break;
+	}
+	/* exclude '/' or ':' itself */
+	start++;
+
+	return xmemdupz(start, end - start);
+}
+
+static int can_create_submodule(unsigned int force, const char *path)
+{
+	int cache_pos, dir_in_cache = 0;
+
+	if (read_cache() < 0)
+		die(_("index file corrupt"));
+
+	cache_pos = cache_name_pos(path, strlen(path));
+	if(cache_pos < 0 &&
+	   directory_exists_in_index(&the_index, path, strlen(path)) == is_cache_directory)
+		dir_in_cache = 1;
+
+	if (!force) {
+		if (cache_pos >= 0 || dir_in_cache)
+			die(_("'%s' already exists in the index"), path);
+	} else {
+		struct cache_entry *ce = NULL;
+		if (cache_pos >= 0)
+			ce = the_index.cache[cache_pos];
+		if (dir_in_cache || (ce && !S_ISGITLINK(ce->ce_mode)))
+			die(_("'%s' already exists in the index and is not a "
+			      "submodule"), path);
+	}
+	return 0;
+}
+
+static const char *parse_token(const char *cp, int *len)
+{
+	const char *p = cp, *start, *end;
+	char *str;
+
+	start = p;
+	while (*p != ' ')
+		p++;
+	end = p;
+	str = xstrndup(start, end - start);
+
+	while(*p == ' ')
+		p++;
+
+	return str;
+}
+
+static void report_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir)
+{
+	struct child_process cp_rem = CHILD_PROCESS_INIT;
+	struct strbuf sb_rem = STRBUF_INIT;
+
+	cp_rem.git_cmd = 1;
+	fprintf(stderr, _("A git directory for '%s' is "
+		"found locally with remote(s):\n"), sm_name);
+	strvec_pushf(&cp_rem.env_array,
+		     "GIT_DIR=%s", git_dir);
+	strvec_push(&cp_rem.env_array, "GIT_WORK_TREE=.");
+	strvec_pushl(&cp_rem.args, "remote", "-v", NULL);
+	if (!capture_command(&cp_rem, &sb_rem, 0)) {
+		int i;
+
+		for (i = 0; i < sb_rem.len; i++) {
+			char *start = sb_rem.buf + i, *end = start;
+			const char *name = start, *url, *tail;
+			int namelen, urllen;
+
+			while (sb_rem.buf[i++] != '\n')
+				end++;
+			url = parse_token(name, &namelen);
+			tail = parse_token(url, &urllen);
+			if (!memcmp(tail, "(fetch)", 7))
+				fprintf(stderr, "  %s\t%s\n", name, url);
+			start = *end ? end + 1 : end;
+		}
+	}
+}
+
+static int add_submodule(struct add_data *info)
+{
+	/* perhaps the path exists and is already a git repo, else clone it */
+	if (is_directory(info->sm_path)) {
+		char *sub_git_path = xstrfmt("%s/.git", info->sm_path);
+		if (is_directory(sub_git_path) || file_exists(sub_git_path))
+			printf(_("Adding existing repo at '%s' to the index\n"),
+				 info->sm_path);
+		else
+			die(_("'%s' already exists and is not a valid git repo"),
+			      info->sm_path);
+		free(sub_git_path);
+	} else {
+		struct strvec clone_args = STRVEC_INIT;
+		struct child_process cp = CHILD_PROCESS_INIT;
+		char *submodule_git_dir = xstrfmt(".git/modules/%s", info->sm_name);
+
+		if (is_directory(submodule_git_dir)) {
+			if (!info->force) {
+				report_fetch_remotes(stderr, info->sm_name,
+						     submodule_git_dir);
+				error(_("If you want to reuse this local git "
+				      "directory instead of cloning again from\n "
+				      "  %s\n"
+				      "use the '--force' option. If the local "
+				      "git directory is not the correct repo\n"
+				      "or you are unsure what this means choose "
+				      "another name with the '--name' option."),
+				      info->realrepo);
+				return 1;
+			} else {
+				printf(_("Reactivating local git directory for "
+					 "submodule '%s'."), info->sm_path);
+			}
+		}
+		free(submodule_git_dir);
+
+		strvec_push(&clone_args, "clone");
+
+		if (info->quiet)
+			strvec_push(&clone_args, "--quiet");
+
+		if (info->progress)
+			strvec_push(&clone_args, "--progress");
+
+		if (info->prefix)
+			strvec_pushl(&clone_args, "--prefix", info->prefix, NULL);
+		strvec_pushl(&clone_args, "--path", info->sm_path, "--name",
+			     info->sm_name, "--url", info->realrepo, NULL);
+		if (info->reference_path)
+			strvec_pushl(&clone_args, "--reference",
+				     info->reference_path, NULL);
+		if (info->dissociate)
+			strvec_push(&clone_args, "--dissociate");
+
+		if (info->depth >= 0)
+			strvec_pushf(&clone_args, "--depth=%d", info->depth);
+
+		if (module_clone(clone_args.nr, clone_args.v, info->prefix)) {
+			strvec_clear(&clone_args);
+			return -1;
+		}
+
+		prepare_submodule_repo_env(&cp.env_array);
+		cp.git_cmd = 1;
+		cp.dir = info->sm_path;
+		strvec_pushl(&cp.args, "checkout", "-f", "-q", NULL);
+
+		if (info->branch) {
+			strvec_pushl(&cp.args, "-B", info->branch, NULL);
+			strvec_pushf(&cp.args, "origin/%s", info->branch);
+		}
+
+		if (run_command(&cp))
+			die(_("unable to checkout submodule '%s'"), info->sm_path);
+	}
+	return 0;
+}
+
+static void config_added_submodule(struct add_data *info)
+{
+	char *key, *var = NULL;
+	struct child_process cp = CHILD_PROCESS_INIT;
+	struct child_process cp2 = CHILD_PROCESS_INIT;
+
+	key = xstrfmt("submodule.%s.url", info->sm_name);
+	git_config_set_gently(key, info->realrepo);
+	free(key);
+
+	cp.git_cmd = 1;
+	strvec_pushl(&cp.args, "add", "--no-warn-embedded-repo", NULL);
+	if (info->force)
+		strvec_push(&cp.args, "--force");
+	strvec_pushl(&cp.args, "--", info->sm_path, NULL);
+
+	if (run_command(&cp))
+		die(_("failed to add submodule '%s'"), info->sm_path);
+
+	key = xstrfmt("submodule.%s.path", info->sm_name);
+	config_set_in_gitmodules_file_gently(key, info->sm_path);
+	free(key);
+	key = xstrfmt("submodule.%s.url", info->sm_name);
+	config_set_in_gitmodules_file_gently(key, info->repo);
+	free(key);
+	key = xstrfmt("submodule.%s.branch", info->sm_name);
+	if (info->branch)
+		config_set_in_gitmodules_file_gently(key, info->branch);
+	free(key);
+
+	cp2.git_cmd = 1;
+	strvec_pushl(&cp2.args, "add", "--force", NULL);
+	strvec_pushl(&cp2.args, "--", ".gitmodules", NULL);
+
+	if (run_command(&cp2))
+		die(_("Failed to register submodule '%s'"), info->sm_path);
+
+	/*
+	 * NEEDSWORK: In a multi-working-tree world, this needs to be
+	 * set in the per-worktree config.
+	 */
+	if (!git_config_get_string("submodule.active", &var) && var) {
+
+		/*
+		 * If the submodule being adding isn't already covered by the
+		 * current configured pathspec, set the submodule's active flag
+		 */
+		if (!is_submodule_active(the_repository, info->sm_path)) {
+			key = xstrfmt("submodule.%s.active", info->sm_name);
+			git_config_set_gently(key, "true");
+			free(key);
+		}
+	} else {
+		key = xstrfmt("submodule.%s.active", info->sm_name);
+		git_config_set_gently(key, "true");
+		free(key);
+	}
+}
+
+static int module_add(int argc, const char **argv, const char *prefix)
+{
+	const char *branch = NULL, *custom_name = NULL, *realrepo = NULL;
+	const char *reference_path = NULL, *repo = NULL, *name = NULL;
+	char *sm_path;
+	int force = 0, quiet = 0, depth = -1, progress = 0, dissociate = 0;
+	struct add_data info = ADD_DATA_INIT;
+	struct strbuf sb = STRBUF_INIT;
+
+	struct option options[] = {
+		OPT_STRING('b', "branch", &branch, N_("branch"),
+			   N_("branch of repository to add as submodule")),
+		OPT_BOOL('f', "force", &force, N_("allow adding an otherwise "
+						  "ignored submodule path")),
+		OPT__QUIET(&quiet, N_("print only error messages")),
+		OPT_BOOL(0, "progress", &progress, N_("force cloning progress")),
+		OPT_STRING(0, "reference", &reference_path, N_("repository"),
+			   N_("reference repository")),
+		OPT_BOOL(0, "dissociate", &dissociate, N_("borrow the objects from reference repositories")),
+		OPT_STRING(0, "name", &custom_name, N_("name"),
+			   N_("sets the submodule’s name to the given string "
+			      "instead of defaulting to its path")),
+		OPT_INTEGER(0, "depth", &depth, N_("depth for shallow clones")),
+		OPT_END()
+	};
+
+	const char *const usage[] = {
+		N_("git submodule--helper add [<options>] [--] [<path>]"),
+		NULL
+	};
+
+	argc = parse_options(argc, argv, prefix, options, usage, 0);
+
+	if (!is_writing_gitmodules_ok())
+		die(_("please make sure that the .gitmodules file is in the working tree"));
+
+	if (prefix && *prefix && reference_path &&
+	    !is_absolute_path(reference_path))
+		reference_path = xstrfmt("%s%s", prefix, reference_path);
+
+	if (argc == 0 || argc > 2) {
+		usage_with_options(usage, options);
+	} else if (argc == 1) {
+		repo = argv[0];
+		sm_path = guess_dir_name(repo);
+	} else {
+		repo = argv[0];
+		sm_path = xstrdup(argv[1]);
+	}
+
+	if (prefix && *prefix && !is_absolute_path(sm_path))
+		sm_path = xstrfmt("%s%s", prefix, sm_path);
+
+	/* assure repo is absolute or relative to parent */
+	if (starts_with_dot_dot_slash(repo) || starts_with_dot_slash(repo)) {
+		char *remote = get_default_remote();
+		char *remoteurl;
+		struct strbuf sb = STRBUF_INIT;
+
+		if (prefix)
+			die(_("relative path can only be used from the toplevel "
+			      "of the working tree"));
+		/* dereference source url relative to parent's url */
+		strbuf_addf(&sb, "remote.%s.url", remote);
+		if (git_config_get_string(sb.buf, &remoteurl))
+			remoteurl = xgetcwd();
+		realrepo = relative_url(remoteurl, repo, NULL);
+
+		free(remoteurl);
+		free(remote);
+	} else if (is_dir_sep(repo[0]) || strchr(repo, ':')) {
+		realrepo = repo;
+	} else {
+		die(_("repo URL: '%s' must be absolute or begin with ./|../"),
+		      repo);
+	}
+
+	/*
+	 * normalize path:
+	 * multiple //; leading ./; /./; /../;
+	 */
+	normalize_path_copy(sm_path, sm_path);
+	/* strip trailing '/' */
+	if (is_dir_sep(sm_path[strlen(sm_path) -1]))
+		sm_path[strlen(sm_path) - 1] = '\0';
+
+	if (can_create_submodule(force, sm_path))
+		return 1;
+
+	strbuf_addstr(&sb, sm_path);
+	if (is_nonbare_repository_dir(&sb)) {
+		struct object_id oid;
+		if (resolve_gitlink_ref(sm_path, "HEAD", &oid) < 0)
+			die(_("'%s' does not have a commit checked out"), sm_path);
+	}
+
+	if (!force) {
+		int exit_code = -1;
+		struct strbuf sb = STRBUF_INIT;
+		struct child_process cp = CHILD_PROCESS_INIT;
+		cp.git_cmd = 1;
+		cp.no_stdout = 1;
+		strvec_pushl(&cp.args, "add", "--dry-run", "--ignore-missing",
+			     "--no-warn-embedded-repo", sm_path, NULL);
+		if ((exit_code = pipe_command(&cp, NULL, 0, NULL, 0, &sb, 0))) {
+			strbuf_complete_line(&sb);
+			fputs(sb.buf, stderr);
+			return exit_code;
+		}
+		strbuf_release(&sb);
+	}
+
+	name = custom_name ? custom_name : sm_path;
+	if (check_submodule_name(name))
+		die(_("'%s' is not a valid submodule name"), name);
+
+	info.prefix = prefix;
+	info.sm_name = name;
+	info.sm_path = sm_path;
+	info.repo = repo;
+	info.realrepo = realrepo;
+	info.reference_path = reference_path;
+	info.branch = branch;
+	info.depth = depth;
+	info.progress = !!progress;
+	info.dissociate = !!dissociate;
+	info.force = !!force;
+	info.quiet = !!quiet;
+
+	if (add_submodule(&info))
+		return 1;
+	config_added_submodule(&info);
+	free(sm_path);
+
+	return 0;
+}
+
 #define SUPPORT_SUPER_PREFIX (1<<0)
 
 struct cmd_struct {
diff --git a/dir.c b/dir.c
index d637461da5..f37de276f2 100644
--- a/dir.c
+++ b/dir.c
@@ -1655,12 +1655,6 @@  struct dir_entry *dir_add_ignored(struct dir_struct *dir,
 	return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
 }
 
-enum exist_status {
-	index_nonexistent = 0,
-	index_directory,
-	index_gitdir
-};
-
 /*
  * Do not use the alphabetically sorted index to look up
  * the directory name; instead, use the case insensitive
@@ -1672,13 +1666,13 @@  static enum exist_status directory_exists_in_index_icase(struct index_state *ist
 	struct cache_entry *ce;
 
 	if (index_dir_exists(istate, dirname, len))
-		return index_directory;
+		return is_cache_directory;
 
 	ce = index_file_exists(istate, dirname, len, ignore_case);
 	if (ce && S_ISGITLINK(ce->ce_mode))
-		return index_gitdir;
+		return is_cache_gitdir;
 
-	return index_nonexistent;
+	return is_cache_absent;
 }
 
 /*
@@ -1688,8 +1682,8 @@  static enum exist_status directory_exists_in_index_icase(struct index_state *ist
  * the files it contains) will sort with the '/' at the
  * end.
  */
-static enum exist_status directory_exists_in_index(struct index_state *istate,
-						   const char *dirname, int len)
+enum exist_status directory_exists_in_index(struct index_state *istate,
+					    const char *dirname, int len)
 {
 	int pos;
 
@@ -1709,11 +1703,11 @@  static enum exist_status directory_exists_in_index(struct index_state *istate,
 		if (endchar > '/')
 			break;
 		if (endchar == '/')
-			return index_directory;
+			return is_cache_directory;
 		if (!endchar && S_ISGITLINK(ce->ce_mode))
-			return index_gitdir;
+			return is_cache_gitdir;
 	}
-	return index_nonexistent;
+	return is_cache_absent;
 }
 
 /*
@@ -1767,11 +1761,11 @@  static enum path_treatment treat_directory(struct dir_struct *dir,
 	/* The "len-1" is to strip the final '/' */
 	enum exist_status status = directory_exists_in_index(istate, dirname, len-1);
 
-	if (status == index_directory)
+	if (status == is_cache_directory)
 		return path_recurse;
-	if (status == index_gitdir)
+	if (status == is_cache_gitdir)
 		return path_none;
-	if (status != index_nonexistent)
+	if (status != is_cache_absent)
 		BUG("Unhandled value for directory_exists_in_index: %d\n", status);
 
 	/*
@@ -2190,7 +2184,7 @@  static enum path_treatment treat_path(struct dir_struct *dir,
 	if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
 	    (dtype == DT_DIR) &&
 	    !has_path_in_index &&
-	    (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent))
+	    (directory_exists_in_index(istate, path->buf, path->len) == is_cache_absent))
 		return path_none;
 
 	excluded = is_excluded(dir, istate, path->buf, &dtype);
diff --git a/dir.h b/dir.h
index a3c40dec51..af817a21b2 100644
--- a/dir.h
+++ b/dir.h
@@ -370,6 +370,15 @@  int read_directory(struct dir_struct *, struct index_state *istate,
 		   const char *path, int len,
 		   const struct pathspec *pathspec);
 
+enum exist_status {
+	is_cache_absent = 0,
+	is_cache_directory,
+	is_cache_gitdir
+};
+
+enum exist_status directory_exists_in_index(struct index_state *istate,
+					    const char *dirname, int len);
+
 enum pattern_match_result {
 	UNDECIDED = -1,
 	NOT_MATCHED = 0,