diff mbox series

[RFC,v5,2/3] grep: make PCRE2 aware of custom allocator

Message ID 20190809030210.18353-3-carenas@gmail.com (mailing list archive)
State New, archived
Headers show
Series grep: almost no more leaks, hopefully no crashes | expand

Commit Message

Carlo Marcelo Arenas Belón Aug. 9, 2019, 3:02 a.m. UTC
94da9193a6 (grep: add support for PCRE v2, 2017-06-01) didn't include
a way to override the system allocator, and so it is incompatible with
USE_NED_ALLOCATOR.  The problem was made visible when an attempt to
avoid a leak in a data structure that is created by the library was
passed to NED's free for disposal triggering a segfault in Windows.

PCRE2 requires the use of a general context to override the allocator
and therefore, there is a lot more code needed than in PCRE1, including
a couple of wrapper functions.

Extend the grep API with a "destructor" that could be called to cleanup
any objects that were created and used globally.

Update builtin/grep to use that new API, but any other future
users should make sure to have matching grep_init/grep_destroy calls
if they are using the pattern matching functionality (currently only
relevant when using both NED and PCRE2)

Move the logic to decide if a general context will be needed to an
earlier phase so it will only be done once per pattern (instead of
at least once per worker thread) avoiding then the need for locking.

This change does the minimum change required to hopefully solve the
crash, with the rest of the users of it added later.

Helped-by: René Scharfe <l.s.r@web.de>
Reported-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Carlo Marcelo Arenas Belón <carenas@gmail.com>
---
 builtin/grep.c |  1 +
 grep.c         | 56 +++++++++++++++++++++++++++++++++++++++++++++++++-
 grep.h         |  1 +
 3 files changed, 57 insertions(+), 1 deletion(-)

Comments

Johannes Schindelin Aug. 27, 2019, 9:07 a.m. UTC | #1
Hi Carlo,

On Thu, 8 Aug 2019, Carlo Marcelo Arenas Belón wrote:

> 94da9193a6 (grep: add support for PCRE v2, 2017-06-01) didn't include
> a way to override the system allocator, and so it is incompatible with
> USE_NED_ALLOCATOR.  The problem was made visible when an attempt to
> avoid a leak in a data structure that is created by the library was
> passed to NED's free for disposal triggering a segfault in Windows.
>
> PCRE2 requires the use of a general context to override the allocator
> and therefore, there is a lot more code needed than in PCRE1, including
> a couple of wrapper functions.
>
> Extend the grep API with a "destructor" that could be called to cleanup
> any objects that were created and used globally.
>
> Update builtin/grep to use that new API, but any other future
> users should make sure to have matching grep_init/grep_destroy calls
> if they are using the pattern matching functionality (currently only
> relevant when using both NED and PCRE2)
>
> Move the logic to decide if a general context will be needed to an
> earlier phase so it will only be done once per pattern (instead of
> at least once per worker thread) avoiding then the need for locking.
>
> This change does the minimum change required to hopefully solve the
> crash, with the rest of the users of it added later.
>
> Helped-by: René Scharfe <l.s.r@web.de>
> Reported-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> Signed-off-by: Carlo Marcelo Arenas Belón <carenas@gmail.com>
> ---

Unfortunately, this is _still_ incorrect.

I pointed out multiple times that custom allocators can be activated at
run-time rather than compile-time, therefore making the choice at
compile-time is wrong. Besides, there is nothing specific to nedmalloc
about this. So the patch is double-wrong on that account.

The patch has a yet even more immediate problem: t7816.48 is failing in
the CI build for _weeks_ now: it requires that global context to be
initialized, but no code path hits the initialization, resulting in a
very, very ugly:

	BUG: grep.c:516: pcre2_global_context uninitialized

See
https://dev.azure.com/gitgitgadget/git/_build/results?buildId=15151&view=ms.vss-test-web.build-test-results-tab&runId=41282&resultId=101710&paneView=debug
for details.

All of this could be easily avoided. As I had pointed out already, the
obvious fragility is not worth the optimization, and we should just
initialize the global context always, as does this patch
(https://github.com/git-for-windows/git/commit/5e5b959169e6efee73e0b50e464166822b7d2d07).

I don't claim that this is complete, you need to check carefully (for
example, I think you will want to get rid of _all_ the references to
nedmalloc), but this patch is at least a stop-gap measure to fix the CI
build (Junio, would you mind adding this as a SQUASH??? so that this
breakage won't keep the CI build of `pu` in the failing state?):

-- snipsnap --
diff --git a/grep.c b/grep.c
index ec845141bbb..4242ad0b4ae 100644
--- a/grep.c
+++ b/grep.c
@@ -19,7 +19,6 @@ static struct grep_opt grep_defaults;
 #ifdef USE_LIBPCRE2
 static pcre2_general_context *pcre2_global_context;

-#ifdef USE_NED_ALLOCATOR
 static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
 {
 	return malloc(size);
@@ -30,7 +29,6 @@ static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
 	return free(pointer);
 }
 #endif
-#endif

 static const char *color_grep_slots[] = {
 	[GREP_COLOR_CONTEXT]	    = "context",
@@ -176,6 +174,12 @@ void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
 	struct grep_opt *def = &grep_defaults;
 	int i;

+#if defined(USE_LIBPCRE2)
+	if (!pcre2_global_context)
+		pcre2_global_context = pcre2_general_context_create(
+					pcre2_malloc, pcre2_free, NULL);
+#endif
+
 #ifdef USE_NED_ALLOCATOR
 #ifdef USE_LIBPCRE1
 	pcre_malloc = malloc;
@@ -343,11 +347,6 @@ void append_header_grep_pattern(struct grep_opt *opt,
 void append_grep_pattern(struct grep_opt *opt, const char *pat,
 			 const char *origin, int no, enum grep_pat_token t)
 {
-#if defined(USE_LIBPCRE2) && defined(USE_NED_ALLOCATOR)
-	if (!pcre2_global_context && opt->ignore_case && has_non_ascii(pat))
-		pcre2_global_context = pcre2_general_context_create(
-					pcre2_malloc, pcre2_free, NULL);
-#endif
 	append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 }

--
2.23.0.windows.1
Carlo Marcelo Arenas Belón Aug. 27, 2019, 11:51 a.m. UTC | #2
On Tue, Aug 27, 2019 at 2:07 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Unfortunately, this is _still_ incorrect.

I know, and that is why we worked out a v6 RC with Rene than then was
pushed to github[0] and validated to work thanks to your integration
as detailed in [1], I never got to send an update to the list though
because Rene wanted my squashing into his patch to be independent as
detailed there and because I assumed that when Junio didn't pull my
reroll, it was probably because your tree had a fix already anyway.

the recent discovery that xmalloc wasn't thread safe though,
complicates things further and that is why I was expecting to reroll
this on top of both ab/pcre-jit-fixes and jk/drop-release-pack-memory
(later one already in next) as detailed in [2]

> I pointed out multiple times that custom allocators can be activated at
> run-time rather than compile-time, therefore making the choice at
> compile-time is wrong. Besides, there is nothing specific to nedmalloc
> about this. So the patch is double-wrong on that account.

Just to clarify, I think my patch accounts for that (haven't tested
that assumption, but will do now that I have a windows box, probably
even with mi-alloc) but yes, the only reason why there were references
to NEDMALLOC was to isolate the code and make sure the fix was
tackling the problem, it was not my intention to do so at the end,
specially once we agreed that xmalloc should be used anyway.

> The patch has a yet even more immediate problem: t7816.48 is failing in
> the CI build for _weeks_ now: it requires that global context to be
> initialized, but no code path hits the initialization, resulting in a
> very, very ugly:
>
>         BUG: grep.c:516: pcre2_global_context uninitialized
>
> See
> https://dev.azure.com/gitgitgadget/git/_build/results?buildId=15151&view=ms.vss-test-web.build-test-results-tab&runId=41282&resultId=101710&paneView=debug
> for details.

IMHO this is probably testing V3 (from pu) and which was hopefully
fixed in the last github force push for my branch

> All of this could be easily avoided. As I had pointed out already, the
> obvious fragility is not worth the optimization, and we should just
> initialize the global context always, as does this patch
> (https://github.com/git-for-windows/git/commit/5e5b959169e6efee73e0b50e464166822b7d2d07).

ironically only found out about that patch after I got a windows box
(running Windows Home though) and had finished testing my own squashed
fix[3] that has succeeded being validated in GitHub

apologize for the delays, and will be fine using your squash, mine,
the V6 RC (my preference) or dropping this series from pu if that
would help clear the ugliness of pu for windows

hopefully this won't be repeated now that I am aware of github's
integration and have my own (albeit very slow) windows environment as
well.

Carlo

[0] https://github.com/git/git/commit/0ca5d0550c17a68d83b8922b71aeff891958ed0e
[1] https://public-inbox.org/git/CAPUEspiFuvgMQ3W1se1B=8aTTrQsJSZTyQTzG1TpiyN8HTOpkA@mail.gmail.com/
[2] https://public-inbox.org/git/CAPUEspg9F7RutCUCoRAAXmRePjiunq3-zG7cN3uz_t5DVMxP=g@mail.gmail.com/
[3] https://github.com/git/git/pull/627
Junio C Hamano Oct. 3, 2019, 5:02 a.m. UTC | #3
Carlo Arenas <carenas@gmail.com> writes:

> On Tue, Aug 27, 2019 at 2:07 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
>>
>> Unfortunately, this is _still_ incorrect.
> ...
> Just to clarify, I think my patch accounts for that (haven't tested
> that assumption, but will do now that I have a windows box, probably
> even with mi-alloc) but yes, the only reason why there were references
> to NEDMALLOC was to isolate the code and make sure the fix was
> tackling the problem, it was not my intention to do so at the end,
> specially once we agreed that xmalloc should be used anyway.
> ...
> apologize for the delays, and will be fine using your squash, mine,
> the V6 RC (my preference) or dropping this series from pu if that
> would help clear the ugliness of pu for windows

So,... have we seen any conclusion on this?  Can any of you guys
give us a pointer to or copies of the candidate to be the final
solution of this topic, please?

Thanks.
Johannes Schindelin Oct. 3, 2019, 8:08 a.m. UTC | #4
Hi Junio,

On Thu, 3 Oct 2019, Junio C Hamano wrote:

> Carlo Arenas <carenas@gmail.com> writes:
>
> > On Tue, Aug 27, 2019 at 2:07 AM Johannes Schindelin
> > <Johannes.Schindelin@gmx.de> wrote:
> >>
> >> Unfortunately, this is _still_ incorrect.
> > ...
> > Just to clarify, I think my patch accounts for that (haven't tested
> > that assumption, but will do now that I have a windows box, probably
> > even with mi-alloc) but yes, the only reason why there were references
> > to NEDMALLOC was to isolate the code and make sure the fix was
> > tackling the problem, it was not my intention to do so at the end,
> > specially once we agreed that xmalloc should be used anyway.
> > ...
> > apologize for the delays, and will be fine using your squash, mine,
> > the V6 RC (my preference) or dropping this series from pu if that
> > would help clear the ugliness of pu for windows
>
> So,... have we seen any conclusion on this?  Can any of you guys
> give us a pointer to or copies of the candidate to be the final
> solution of this topic, please?

I still need
https://github.com/git-for-windows/git/commit/719beb813e4f27f090696ce583df3e5f3c480545
and
https://github.com/git-for-windows/git/commit/3369c322bbd95820b971701fef7db44b26dd826f
to fix that part in Git for Windows' `shears/pu` branch (i.e. the
continuously rebased patch thicket).

Ciao,
Dscho
Carlo Marcelo Arenas Belón Oct. 3, 2019, 11:17 a.m. UTC | #5
On Thu, Oct 3, 2019 at 1:09 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
> I still need
> https://github.com/git-for-windows/git/commit/719beb813e4f27f090696ce583df3e5f3c480545
> and
> https://github.com/git-for-windows/git/commit/3369c322bbd95820b971701fef7db44b26dd826f
> to fix that part in Git for Windows' `shears/pu` branch (i.e. the
> continuously rebased patch thicket).

or we could drop the current branch in pu and start again from scratch
now that all of the required dependencies had been merged.

apologize for the delays otherwise; $DAYJOB has taken a toll lately
and even my new shiny windows dev box hasn't seen much action.

will update here and in github shortly (which might mean up to this
weekend, being realistic), but should be better code (since it is
mostly Rene's) and better tested that way and hopefully won't cause
more breakage (specially in Windows, sorry Dscho)

Carlo
Johannes Schindelin Oct. 3, 2019, 6:23 p.m. UTC | #6
Hi Carlo,

On Thu, 3 Oct 2019, Carlo Arenas wrote:

> On Thu, Oct 3, 2019 at 1:09 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> > I still need
> > https://github.com/git-for-windows/git/commit/719beb813e4f27f090696ce583df3e5f3c480545
> > and
> > https://github.com/git-for-windows/git/commit/3369c322bbd95820b971701fef7db44b26dd826f
> > to fix that part in Git for Windows' `shears/pu` branch (i.e. the
> > continuously rebased patch thicket).
>
> or we could drop the current branch in pu and start again from scratch
> now that all of the required dependencies had been merged.

I hope that your next iteration won't have any `#ifdef NED` in it?

>
> apologize for the delays otherwise; $DAYJOB has taken a toll lately
> and even my new shiny windows dev box hasn't seen much action.
>
> will update here and in github shortly (which might mean up to this
> weekend, being realistic), but should be better code (since it is
> mostly Rene's) and better tested that way and hopefully won't cause
> more breakage (specially in Windows, sorry Dscho)

I appreciate all your work!

Thanks,
Dscho
Junio C Hamano Oct. 3, 2019, 10:57 p.m. UTC | #7
Carlo Arenas <carenas@gmail.com> writes:

> or we could drop the current branch in pu and start again from scratch
> now that all of the required dependencies had been merged.

That would be the cleanest from my point of view.  Thanks, both.
diff mbox series

Patch

diff --git a/builtin/grep.c b/builtin/grep.c
index 560051784e..e49c20df60 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1145,5 +1145,6 @@  int cmd_grep(int argc, const char **argv, const char *prefix)
 		run_pager(&opt, prefix);
 	clear_pathspec(&pathspec);
 	free_grep_patterns(&opt);
+	grep_destroy();
 	return !hit;
 }
diff --git a/grep.c b/grep.c
index 0154998695..8e0b838db0 100644
--- a/grep.c
+++ b/grep.c
@@ -16,6 +16,44 @@  static int grep_source_is_binary(struct grep_source *gs,
 
 static struct grep_opt grep_defaults;
 
+#ifdef USE_LIBPCRE2
+static pcre2_general_context *pcre2_global_context;
+
+#ifdef USE_NED_ALLOCATOR
+static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
+{
+	return xmalloc(size); /* will use nedalloc underneath */
+}
+
+static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
+{
+	return free(pointer);
+}
+
+/*
+ * BUG: this is technically not needed if we will do UTF matching
+ *      but UTF locale detection is currently broken
+ * TODO: has_non_ascii() doesn't support NUL in pattern
+ */
+void setup_pcre2_as_needed(struct grep_opt *opt, const char *pat)
+{
+	if (!pcre2_global_context && opt->ignore_case &&
+		has_non_ascii(pat))
+		pcre2_global_context = pcre2_general_context_create(
+					pcre2_malloc, pcre2_free, NULL);
+}
+
+static void cleanup_pcre2_as_needed(void)
+{
+	pcre2_general_context_free(pcre2_global_context);
+}
+
+#else
+#define setup_pcre2_as_needed(opt, pat)
+#define cleanup_pcre2_as_needed()
+#endif
+#endif
+
 static const char *color_grep_slots[] = {
 	[GREP_COLOR_CONTEXT]	    = "context",
 	[GREP_COLOR_FILENAME]	    = "filename",
@@ -153,6 +191,7 @@  int grep_config(const char *var, const char *value, void *cb)
  *
  * If using PCRE make sure that the library is configured
  * to use the right allocator (ex: NED)
+ * if any object is created it should be cleaned up in grep_destroy()
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix)
 {
@@ -188,6 +227,11 @@  void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
 		color_set(opt->colors[i], def->colors[i]);
 }
 
+void grep_destroy(void)
+{
+	cleanup_pcre2_as_needed();
+}
+
 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 {
 	/*
@@ -326,6 +370,7 @@  void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
 		     const char *origin, int no, enum grep_pat_token t)
 {
 	struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
+	setup_pcre2_as_needed(opt, pat);
 	do_append_grep_pat(&opt->pattern_tail, p);
 }
 
@@ -507,9 +552,18 @@  static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 
 	p->pcre2_compile_context = NULL;
 
+	/*
+	 * pcre2_global_context is initialized in append_grep_pat()
+	 * with logic from setup_pcre2_as_needed() that mimics what
+	 * is used here and with the BUG() to protect from mismatches
+	 */
 	if (opt->ignore_case) {
 		if (has_non_ascii(p->pattern)) {
-			character_tables = pcre2_maketables(NULL);
+#ifdef USE_NED_ALLOCATOR
+			if (!pcre2_global_context)
+				BUG("pcre2_global_context uninitialized");
+#endif
+			character_tables = pcre2_maketables(pcre2_global_context);
 			p->pcre2_compile_context = pcre2_compile_context_create(NULL);
 			pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
 		}
diff --git a/grep.h b/grep.h
index 1875880f37..526c2db9ef 100644
--- a/grep.h
+++ b/grep.h
@@ -189,6 +189,7 @@  struct grep_opt {
 void init_grep_defaults(struct repository *);
 int grep_config(const char *var, const char *value, void *);
 void grep_init(struct grep_opt *, struct repository *repo, const char *prefix);
+void grep_destroy(void);
 void grep_commit_pattern_type(enum grep_pattern_type, struct grep_opt *opt);
 
 void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t);