diff mbox series

[v2,09/10] grep/pcre2: move back to thread-only PCREv2 structures

Message ID 20210218000728.13995-10-avarab@gmail.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Ævar Arnfjörð Bjarmason Feb. 18, 2021, 12:07 a.m. UTC
Change the setup of the "pcre2_general_context" to happen per-thread
in compile_pcre2_pattern() instead of in grep_init().

This change brings it in line with how the rest of the pcre2_* members
in the grep_pat structure are set up.

As noted in the preceding commit the approach 513f2b0bbd4 (grep: make
PCRE2 aware of custom allocator, 2019-10-16) took to allocate the
pcre2_general_context seems to have been initially based on a
misunderstanding of how PCREv2 memory allocation works.

The approach of creating a global context in grep_init() is just added
complexity for almost zero gain. On my system it's 24 bytes saved
per-thread. For comparison PCREv2 will then go on to allocate at least
a kilobyte for its own thread-local state.

As noted in 6d423dd542f (grep: don't redundantly compile throwaway
patterns under threading, 2017-05-25) the grep code is intentionally
not trying to micro-optimize allocations by e.g. sharing some PCREv2
structures globally, while making others thread-local.

So let's remove this special case and make all of them thread-local
again for simplicity. With this change we could move the
pcre2_{malloc,free} functions around to live closer to their current
use. I'm not doing that here to keep this change small, that cleanup
will be done in a follow-up commit.

See also the discussion in 94da9193a6 (grep: add support for PCRE v2,
2017-06-01) about thread safety, and Johannes's comments[1] to the
effect that we should be doing what this patch is doing.

1. https://lore.kernel.org/git/nycvar.QRO.7.76.6.1908052120302.46@tvgsbejvaqbjf.bet/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/grep.c |  1 -
 grep.c         | 41 +++++++++++++++--------------------------
 grep.h         |  3 ++-
 3 files changed, 17 insertions(+), 28 deletions(-)

Comments

Junio C Hamano March 4, 2021, 12:27 a.m. UTC | #1
Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:

> So let's remove this special case and make all of them thread-local
> again for simplicity. With this change we could move the
> pcre2_{malloc,free} functions around to live closer to their current
> use. I'm not doing that here to keep this change small, that cleanup
> will be done in a follow-up commit.

Nice.
diff mbox series

Patch

diff --git a/builtin/grep.c b/builtin/grep.c
index 55d06c95130..c69fe993406 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1175,6 +1175,5 @@  int cmd_grep(int argc, const char **argv, const char *prefix)
 		run_pager(&opt, prefix);
 	clear_pathspec(&pathspec);
 	free_grep_patterns(&opt);
-	grep_destroy();
 	return !hit;
 }
diff --git a/grep.c b/grep.c
index 0116ff5f09f..2599f329cd6 100644
--- a/grep.c
+++ b/grep.c
@@ -41,7 +41,6 @@  static struct grep_opt grep_defaults = {
 };
 
 #ifdef USE_LIBPCRE2
-static pcre2_general_context *pcre2_global_context;
 #define GREP_PCRE2_DEBUG_MALLOC 0
 
 static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
@@ -163,20 +162,9 @@  int grep_config(const char *var, const char *value, void *cb)
  * Initialize one instance of grep_opt and copy the
  * default values from the template we read the configuration
  * information in an earlier call to git_config(grep_config).
- *
- * If using PCRE, make sure that the library is configured
- * to use the same allocator as Git (e.g. nedmalloc on Windows).
- *
- * Any allocated memory needs to be released in grep_destroy().
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix)
 {
-#if defined(USE_LIBPCRE2)
-	if (!pcre2_global_context)
-		pcre2_global_context = pcre2_general_context_create(
-					pcre2_malloc, pcre2_free, NULL);
-#endif
-
 	*opt = grep_defaults;
 
 	opt->repo = repo;
@@ -186,13 +174,6 @@  void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
 	opt->header_tail = &opt->header_list;
 }
 
-void grep_destroy(void)
-{
-#ifdef USE_LIBPCRE2
-	pcre2_general_context_free(pcre2_global_context);
-#endif
-}
-
 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 {
 	/*
@@ -384,13 +365,20 @@  static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 	int patinforet;
 	size_t jitsizearg;
 
-	/* pcre2_global_context is initialized in grep_init */
+	/*
+	 * Call pcre2_general_context_create() before calling any
+	 * other pcre2_*(). It sets up our malloc()/free() functions
+	 * with which everything else is allocated.
+	 */
+	p->pcre2_general_context = pcre2_general_context_create(
+		pcre2_malloc, pcre2_free, NULL);
+	if (!p->pcre2_general_context)
+		die("Couldn't allocate PCRE2 general context");
+
 	if (opt->ignore_case) {
 		if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
-			if (!pcre2_global_context)
-				BUG("pcre2_global_context uninitialized");
-			p->pcre2_tables = pcre2_maketables(pcre2_global_context);
-			p->pcre2_compile_context = pcre2_compile_context_create(pcre2_global_context);
+			p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
+			p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
 			pcre2_set_character_tables(p->pcre2_compile_context,
 							p->pcre2_tables);
 		}
@@ -411,7 +399,7 @@  static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 					 p->pcre2_compile_context);
 
 	if (p->pcre2_pattern) {
-		p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, pcre2_global_context);
+		p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
 		if (!p->pcre2_match_data)
 			die("Couldn't allocate PCRE2 match data");
 	} else {
@@ -491,10 +479,11 @@  static void free_pcre2_pattern(struct grep_pat *p)
 	pcre2_code_free(p->pcre2_pattern);
 	pcre2_match_data_free(p->pcre2_match_data);
 #ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
-	pcre2_maketables_free(pcre2_global_context, p->pcre2_tables);
+	pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
 #else
 	free((void *)p->pcre2_tables);
 #endif
+	pcre2_general_context_free(p->pcre2_general_context);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
diff --git a/grep.h b/grep.h
index 64666e92049..72f82b1e302 100644
--- a/grep.h
+++ b/grep.h
@@ -14,6 +14,7 @@ 
 typedef int pcre2_code;
 typedef int pcre2_match_data;
 typedef int pcre2_compile_context;
+typedef int pcre2_general_context;
 #endif
 #ifndef PCRE2_MATCH_INVALID_UTF
 /* PCRE2_MATCH_* dummy also with !USE_LIBPCRE2, for test-pcre2-config.c */
@@ -75,6 +76,7 @@  struct grep_pat {
 	pcre2_code *pcre2_pattern;
 	pcre2_match_data *pcre2_match_data;
 	pcre2_compile_context *pcre2_compile_context;
+	pcre2_general_context *pcre2_general_context;
 	const uint8_t *pcre2_tables;
 	uint32_t pcre2_jit_on;
 	unsigned fixed:1;
@@ -167,7 +169,6 @@  struct grep_opt {
 
 int grep_config(const char *var, const char *value, void *);
 void grep_init(struct grep_opt *, struct repository *repo, const char *prefix);
-void grep_destroy(void);
 void grep_commit_pattern_type(enum grep_pattern_type, struct grep_opt *opt);
 
 void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t);