diff mbox series

[19/22] userdiff: fix leaking memory for configured diff drivers

Message ID ef780aa36039560fd069ec97ce87665eb0775200.1722933643.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series Memory leak fixes (pt.4) | expand

Commit Message

Patrick Steinhardt Aug. 6, 2024, 9:01 a.m. UTC
The userdiff structures may be initialized either statically on the
stack or dynamically via configuration keys. In the latter case we end
up leaking memory because we didn't have any infrastructure to discern
those strings which have been allocated statically and those which have
been allocated dynamically.

Refactor the code such that we have two pointers for each of these
strings: one that holds the value as accessed by other subsystems, and
one that points to the same string in case it has been allocated. Like
this, we can safely free the second pointer and thus plug those memory
leaks.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 range-diff.c                     |  6 +++--
 t/t4018-diff-funcname.sh         |  1 +
 t/t4042-diff-textconv-caching.sh |  2 ++
 t/t4048-diff-combined-binary.sh  |  1 +
 t/t4209-log-pickaxe.sh           |  2 ++
 userdiff.c                       | 38 ++++++++++++++++++++++++--------
 userdiff.h                       |  4 ++++
 7 files changed, 43 insertions(+), 11 deletions(-)

Comments

James Liu Aug. 7, 2024, 9:25 a.m. UTC | #1
> Refactor the code such that we have two pointers for each of these
> strings: one that holds the value as accessed by other subsystems, and
> one that points to the same string in case it has been allocated. Like
> this, we can safely free the second pointer and thus plug those memory
> leaks.
>
> diff --git a/userdiff.c b/userdiff.c
> index c4ebb9ff73..989629149f 100644
> --- a/userdiff.c
> +++ b/userdiff.c
> @@ -399,8 +399,11 @@ static struct userdiff_driver *userdiff_find_by_namelen(const char *name, size_t
>  static int parse_funcname(struct userdiff_funcname *f, const char *k,
>  		const char *v, int cflags)
>  {
> -	if (git_config_string((char **) &f->pattern, k, v) < 0)
> +	f->pattern = NULL;
> +	FREE_AND_NULL(f->pattern_owned);
> +	if (git_config_string(&f->pattern_owned, k, v) < 0)
>  		return -1;
> +	f->pattern = f->pattern_owned;
>  	f->cflags = cflags;
>  	return 0;
>  }

I'm not sure if I understand this change completely. We don't seem to be
using `pattern_owned` (and the other *_owned) fields differently from
their regular counterparts.

Is it because we can't do the following?

        FREE_AND_NULL((char **)f->pattern);
Patrick Steinhardt Aug. 8, 2024, 5:05 a.m. UTC | #2
On Wed, Aug 07, 2024 at 07:25:51PM +1000, James Liu wrote:
> > Refactor the code such that we have two pointers for each of these
> > strings: one that holds the value as accessed by other subsystems, and
> > one that points to the same string in case it has been allocated. Like
> > this, we can safely free the second pointer and thus plug those memory
> > leaks.
> >
> > diff --git a/userdiff.c b/userdiff.c
> > index c4ebb9ff73..989629149f 100644
> > --- a/userdiff.c
> > +++ b/userdiff.c
> > @@ -399,8 +399,11 @@ static struct userdiff_driver *userdiff_find_by_namelen(const char *name, size_t
> >  static int parse_funcname(struct userdiff_funcname *f, const char *k,
> >  		const char *v, int cflags)
> >  {
> > -	if (git_config_string((char **) &f->pattern, k, v) < 0)
> > +	f->pattern = NULL;
> > +	FREE_AND_NULL(f->pattern_owned);
> > +	if (git_config_string(&f->pattern_owned, k, v) < 0)
> >  		return -1;
> > +	f->pattern = f->pattern_owned;
> >  	f->cflags = cflags;
> >  	return 0;
> >  }
> 
> I'm not sure if I understand this change completely. We don't seem to be
> using `pattern_owned` (and the other *_owned) fields differently from
> their regular counterparts.
> 
> Is it because we can't do the following?
> 
>         FREE_AND_NULL((char **)f->pattern);

Yup. We have a bunch of statically defined userdiff drivers, all of
which use string constants as patterns. We thus cannot reliably free
those and instead have to track the allocated strings in a separate
variable.

Patrick
Junio C Hamano Aug. 8, 2024, 4:05 p.m. UTC | #3
Patrick Steinhardt <ps@pks.im> writes:

>> > -	if (git_config_string((char **) &f->pattern, k, v) < 0)
>> > +	f->pattern = NULL;
>> > +	FREE_AND_NULL(f->pattern_owned);
>> > +	if (git_config_string(&f->pattern_owned, k, v) < 0)
>> >  		return -1;
>> > +	f->pattern = f->pattern_owned;
>> >  	f->cflags = cflags;
>> >  	return 0;
>> >  }
>
> Yup. We have a bunch of statically defined userdiff drivers, all of
> which use string constants as patterns. We thus cannot reliably free
> those and instead have to track the allocated strings in a separate
> variable.

In other words, this is the usual "foo is the variable to be used,
and it may point at foo_to_free, when the value is an allocated
string" pattern.  I doubt .pattern_to_free is a better name even in
the name of consistency---foo_to_free is certainly much better than
foo_owned as a name for a temporary variable in a small scope, but a
structure member is a much longer validity and I am OK if we decide
to adopt the convention to call a structure member .foo_owned when
it is used in this manner.

Thanks.
diff mbox series

Patch

diff --git a/range-diff.c b/range-diff.c
index 5f01605550..bbb0952264 100644
--- a/range-diff.c
+++ b/range-diff.c
@@ -450,8 +450,10 @@  static void output_pair_header(struct diff_options *diffopt,
 }
 
 static struct userdiff_driver section_headers = {
-	.funcname = { "^ ## (.*) ##$\n"
-		      "^.?@@ (.*)$", REG_EXTENDED }
+	.funcname = {
+		.pattern = "^ ## (.*) ##$\n^.?@@ (.*)$",
+		.cflags = REG_EXTENDED,
+	},
 };
 
 static struct diff_filespec *get_filespec(const char *name, const char *p)
diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh
index e026fac1f4..8128c30e7f 100755
--- a/t/t4018-diff-funcname.sh
+++ b/t/t4018-diff-funcname.sh
@@ -5,6 +5,7 @@ 
 
 test_description='Test custom diff function name patterns'
 
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 test_expect_success 'setup' '
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
index 8ebfa3c1be..a179205394 100755
--- a/t/t4042-diff-textconv-caching.sh
+++ b/t/t4042-diff-textconv-caching.sh
@@ -1,6 +1,8 @@ 
 #!/bin/sh
 
 test_description='test textconv caching'
+
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 cat >helper <<'EOF'
diff --git a/t/t4048-diff-combined-binary.sh b/t/t4048-diff-combined-binary.sh
index 0260cf64f5..f399484bce 100755
--- a/t/t4048-diff-combined-binary.sh
+++ b/t/t4048-diff-combined-binary.sh
@@ -4,6 +4,7 @@  test_description='combined and merge diff handle binary files and textconv'
 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 test_expect_success 'setup binary merge conflict' '
diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh
index 64e1623733..b42fdc54fc 100755
--- a/t/t4209-log-pickaxe.sh
+++ b/t/t4209-log-pickaxe.sh
@@ -1,6 +1,8 @@ 
 #!/bin/sh
 
 test_description='log --grep/--author/--regexp-ignore-case/-S/-G'
+
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 test_log () {
diff --git a/userdiff.c b/userdiff.c
index c4ebb9ff73..989629149f 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -399,8 +399,11 @@  static struct userdiff_driver *userdiff_find_by_namelen(const char *name, size_t
 static int parse_funcname(struct userdiff_funcname *f, const char *k,
 		const char *v, int cflags)
 {
-	if (git_config_string((char **) &f->pattern, k, v) < 0)
+	f->pattern = NULL;
+	FREE_AND_NULL(f->pattern_owned);
+	if (git_config_string(&f->pattern_owned, k, v) < 0)
 		return -1;
+	f->pattern = f->pattern_owned;
 	f->cflags = cflags;
 	return 0;
 }
@@ -444,20 +447,37 @@  int userdiff_config(const char *k, const char *v)
 		return parse_funcname(&drv->funcname, k, v, REG_EXTENDED);
 	if (!strcmp(type, "binary"))
 		return parse_tristate(&drv->binary, k, v);
-	if (!strcmp(type, "command"))
-		return git_config_string((char **) &drv->external.cmd, k, v);
+	if (!strcmp(type, "command")) {
+		FREE_AND_NULL(drv->external.cmd);
+		return git_config_string(&drv->external.cmd, k, v);
+	}
 	if (!strcmp(type, "trustexitcode")) {
 		drv->external.trust_exit_code = git_config_bool(k, v);
 		return 0;
 	}
-	if (!strcmp(type, "textconv"))
-		return git_config_string((char **) &drv->textconv, k, v);
+	if (!strcmp(type, "textconv")) {
+		int ret;
+		FREE_AND_NULL(drv->textconv_owned);
+		ret = git_config_string(&drv->textconv_owned, k, v);
+		drv->textconv = drv->textconv_owned;
+		return ret;
+	}
 	if (!strcmp(type, "cachetextconv"))
 		return parse_bool(&drv->textconv_want_cache, k, v);
-	if (!strcmp(type, "wordregex"))
-		return git_config_string((char **) &drv->word_regex, k, v);
-	if (!strcmp(type, "algorithm"))
-		return git_config_string((char **) &drv->algorithm, k, v);
+	if (!strcmp(type, "wordregex")) {
+		int ret;
+		FREE_AND_NULL(drv->word_regex_owned);
+		ret = git_config_string(&drv->word_regex_owned, k, v);
+		drv->word_regex = drv->word_regex_owned;
+		return ret;
+	}
+	if (!strcmp(type, "algorithm")) {
+		int ret;
+		FREE_AND_NULL(drv->algorithm_owned);
+		ret = git_config_string(&drv->algorithm_owned, k, v);
+		drv->algorithm = drv->algorithm_owned;
+		return ret;
+	}
 
 	return 0;
 }
diff --git a/userdiff.h b/userdiff.h
index 7565930337..827361b0bc 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -8,6 +8,7 @@  struct repository;
 
 struct userdiff_funcname {
 	const char *pattern;
+	char *pattern_owned;
 	int cflags;
 };
 
@@ -20,11 +21,14 @@  struct userdiff_driver {
 	const char *name;
 	struct external_diff external;
 	const char *algorithm;
+	char *algorithm_owned;
 	int binary;
 	struct userdiff_funcname funcname;
 	const char *word_regex;
+	char *word_regex_owned;
 	const char *word_regex_multi_byte;
 	const char *textconv;
+	char *textconv_owned;
 	struct notes_cache *textconv_cache;
 	int textconv_want_cache;
 };