diff mbox series

[5/7] grep: allocate subrepos on heap

Message ID f62608e88f125096c1f236a47e3ee670104c7b78.1628618950.git.jonathantanmy@google.com (mailing list archive)
State Superseded
Headers show
Series In grep, no adding submodule ODB as alternates | expand

Commit Message

Jonathan Tan Aug. 10, 2021, 6:28 p.m. UTC
Currently, struct repository objects corresponding to submodules are
allocated on the stack in grep_submodule(). This currently works because
they will not be used once grep_submodule() exits, but a subsequent
patch will require these structs to be accessible for longer (perhaps
even in another thread). Allocate them on the heap and clear them only
at the very end.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 builtin/grep.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

Comments

Emily Shaffer Aug. 11, 2021, 9:50 p.m. UTC | #1
On Tue, Aug 10, 2021 at 11:28:43AM -0700, Jonathan Tan wrote:
> diff --git a/builtin/grep.c b/builtin/grep.c
> index 9e61c7c993..5a40e18e47 100644
> --- a/builtin/grep.c
> +++ b/builtin/grep.c
> @@ -65,6 +65,9 @@ static int todo_done;
>  /* Has all work items been added? */
>  static int all_work_added;
>  
> +static struct repository **repos_to_free;
> +static size_t repos_to_free_nr, repos_to_free_alloc;

One thing I was curious about was whether it would make more sense to
use an existing utility library in Git to handle this. But I think we
kind of are, since ALLOC_GROW is used, so this is idiomatic for Git
project. Ok, fine, I guess this is life at C ;)

> +
>  /* This lock protects all the variables above. */
>  static pthread_mutex_t grep_mutex;
>  
> @@ -168,6 +171,17 @@ static void work_done(struct work_item *w)
>  	grep_unlock();
>  }
>  
> +static void free_repos(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < repos_to_free_nr; i++) {
> +		repo_clear(repos_to_free[i]);
> +		free(repos_to_free[i]);
> +	}
> +	free(repos_to_free);
> +}

Should repos_to_free_nr be reset here? I guess it doesn't make sense to,
since we'd be trying to use-after-free the initial repos_to_free head
pointer too if we wanted to reuse this array.

> +
>  static void *run(void *arg)
>  {
>  	int hit = 0;
> @@ -415,19 +429,24 @@ static int grep_submodule(struct grep_opt *opt,
>  			  const struct object_id *oid,
>  			  const char *filename, const char *path, int cached)
>  {
> -	struct repository subrepo;
> +	struct repository *subrepo;
>  	struct repository *superproject = opt->repo;
>  	const struct submodule *sub;
>  	struct grep_opt subopt;
> -	int hit;
> +	int hit = 0;
>  
>  	sub = submodule_from_path(superproject, null_oid(), path);
>  
>  	if (!is_submodule_active(superproject, path))
>  		return 0;
>  
> -	if (repo_submodule_init(&subrepo, superproject, sub))
> +	subrepo = xmalloc(sizeof(*subrepo));
> +	if (repo_submodule_init(subrepo, superproject, sub)) {
> +		free(subrepo);
>  		return 0;
> +	}
> +	ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc);
> +	repos_to_free[repos_to_free_nr++] = subrepo;

Is this the only place we add to the repos_to_free array? It looks like
yes, so I guess this doesn't need a helper.

>  
>  	/*
>  	 * NEEDSWORK: repo_read_gitmodules() might call
> @@ -438,7 +457,7 @@ static int grep_submodule(struct grep_opt *opt,
>  	 * subrepo's odbs to the in-memory alternates list.
>  	 */
>  	obj_read_lock();
> -	repo_read_gitmodules(&subrepo, 0);
> +	repo_read_gitmodules(subrepo, 0);
>  
>  	/*
>  	 * NEEDSWORK: This adds the submodule's object directory to the list of
> @@ -450,11 +469,11 @@ static int grep_submodule(struct grep_opt *opt,
>  	 * store is no longer global and instead is a member of the repository
>  	 * object.
>  	 */
> -	add_submodule_odb_by_path(subrepo.objects->odb->path);
> +	add_submodule_odb_by_path(subrepo->objects->odb->path);
>  	obj_read_unlock();
>  
>  	memcpy(&subopt, opt, sizeof(subopt));
> -	subopt.repo = &subrepo;
> +	subopt.repo = subrepo;
>  
>  	if (oid) {
>  		enum object_type object_type;
> @@ -464,9 +483,9 @@ static int grep_submodule(struct grep_opt *opt,
>  		struct strbuf base = STRBUF_INIT;
>  
>  		obj_read_lock();
> -		object_type = oid_object_info(&subrepo, oid, NULL);
> +		object_type = oid_object_info(subrepo, oid, NULL);
>  		obj_read_unlock();
> -		data = read_object_with_reference(&subrepo,
> +		data = read_object_with_reference(subrepo,
>  						  oid, tree_type,
>  						  &size, NULL);
>  		if (!data)
> @@ -484,7 +503,6 @@ static int grep_submodule(struct grep_opt *opt,
>  		hit = grep_cache(&subopt, pathspec, cached);
>  	}
>  
> -	repo_clear(&subrepo);
>  	return hit;
>  }
>  
> @@ -1182,5 +1200,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
>  		run_pager(&opt, prefix);
>  	clear_pathspec(&pathspec);
>  	free_grep_patterns(&opt);
> +	free_repos();
>  	return !hit;
>  }

The rest of the diff seems to be a pretty clear object-becomes-reference
conversion, so...

Reviewed-by: Emily Shaffer <emilyshaffer@google.com>
Jonathan Tan Aug. 13, 2021, 4:42 p.m. UTC | #2
> > +static struct repository **repos_to_free;
> > +static size_t repos_to_free_nr, repos_to_free_alloc;
> 
> One thing I was curious about was whether it would make more sense to
> use an existing utility library in Git to handle this. But I think we
> kind of are, since ALLOC_GROW is used, so this is idiomatic for Git
> project. Ok, fine, I guess this is life at C ;)

Yeah, this *is* the utility library :-)

> > +static void free_repos(void)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < repos_to_free_nr; i++) {
> > +		repo_clear(repos_to_free[i]);
> > +		free(repos_to_free[i]);
> > +	}
> > +	free(repos_to_free);
> > +}
> 
> Should repos_to_free_nr be reset here? I guess it doesn't make sense to,
> since we'd be trying to use-after-free the initial repos_to_free head
> pointer too if we wanted to reuse this array.

Hmm...I guess that if I'm going through the trouble of clearing the
repos instead of just letting all the memory be collected at the end of
a process, I should also reset _nr and _alloc. I'll make that change.
diff mbox series

Patch

diff --git a/builtin/grep.c b/builtin/grep.c
index 9e61c7c993..5a40e18e47 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -65,6 +65,9 @@  static int todo_done;
 /* Has all work items been added? */
 static int all_work_added;
 
+static struct repository **repos_to_free;
+static size_t repos_to_free_nr, repos_to_free_alloc;
+
 /* This lock protects all the variables above. */
 static pthread_mutex_t grep_mutex;
 
@@ -168,6 +171,17 @@  static void work_done(struct work_item *w)
 	grep_unlock();
 }
 
+static void free_repos(void)
+{
+	int i;
+
+	for (i = 0; i < repos_to_free_nr; i++) {
+		repo_clear(repos_to_free[i]);
+		free(repos_to_free[i]);
+	}
+	free(repos_to_free);
+}
+
 static void *run(void *arg)
 {
 	int hit = 0;
@@ -415,19 +429,24 @@  static int grep_submodule(struct grep_opt *opt,
 			  const struct object_id *oid,
 			  const char *filename, const char *path, int cached)
 {
-	struct repository subrepo;
+	struct repository *subrepo;
 	struct repository *superproject = opt->repo;
 	const struct submodule *sub;
 	struct grep_opt subopt;
-	int hit;
+	int hit = 0;
 
 	sub = submodule_from_path(superproject, null_oid(), path);
 
 	if (!is_submodule_active(superproject, path))
 		return 0;
 
-	if (repo_submodule_init(&subrepo, superproject, sub))
+	subrepo = xmalloc(sizeof(*subrepo));
+	if (repo_submodule_init(subrepo, superproject, sub)) {
+		free(subrepo);
 		return 0;
+	}
+	ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc);
+	repos_to_free[repos_to_free_nr++] = subrepo;
 
 	/*
 	 * NEEDSWORK: repo_read_gitmodules() might call
@@ -438,7 +457,7 @@  static int grep_submodule(struct grep_opt *opt,
 	 * subrepo's odbs to the in-memory alternates list.
 	 */
 	obj_read_lock();
-	repo_read_gitmodules(&subrepo, 0);
+	repo_read_gitmodules(subrepo, 0);
 
 	/*
 	 * NEEDSWORK: This adds the submodule's object directory to the list of
@@ -450,11 +469,11 @@  static int grep_submodule(struct grep_opt *opt,
 	 * store is no longer global and instead is a member of the repository
 	 * object.
 	 */
-	add_submodule_odb_by_path(subrepo.objects->odb->path);
+	add_submodule_odb_by_path(subrepo->objects->odb->path);
 	obj_read_unlock();
 
 	memcpy(&subopt, opt, sizeof(subopt));
-	subopt.repo = &subrepo;
+	subopt.repo = subrepo;
 
 	if (oid) {
 		enum object_type object_type;
@@ -464,9 +483,9 @@  static int grep_submodule(struct grep_opt *opt,
 		struct strbuf base = STRBUF_INIT;
 
 		obj_read_lock();
-		object_type = oid_object_info(&subrepo, oid, NULL);
+		object_type = oid_object_info(subrepo, oid, NULL);
 		obj_read_unlock();
-		data = read_object_with_reference(&subrepo,
+		data = read_object_with_reference(subrepo,
 						  oid, tree_type,
 						  &size, NULL);
 		if (!data)
@@ -484,7 +503,6 @@  static int grep_submodule(struct grep_opt *opt,
 		hit = grep_cache(&subopt, pathspec, cached);
 	}
 
-	repo_clear(&subrepo);
 	return hit;
 }
 
@@ -1182,5 +1200,6 @@  int cmd_grep(int argc, const char **argv, const char *prefix)
 		run_pager(&opt, prefix);
 	clear_pathspec(&pathspec);
 	free_grep_patterns(&opt);
+	free_repos();
 	return !hit;
 }