diff mbox series

[v2,RESEND] libselinux: Workaround for heap overhead of pcre

Message ID 20230112091408.2880781-1-inseob@google.com (mailing list archive)
State Accepted, archived
Delegated to: Jason Zaman
Headers show
Series [v2,RESEND] libselinux: Workaround for heap overhead of pcre | expand

Commit Message

Inseob Kim Jan. 12, 2023, 9:14 a.m. UTC
pcre's behavior is changed so that pcre2_match always allocates heap for
match_data, rather than stack, regardless of size. The heap isn't freed
until explicitly calling pcre2_match_data_free. This new behavior may
result in heap overhead, which may increase the peak memory usage about
a few megabytes. It's because regex_match is first called for regex_data
objects, and then regex_data objects are freed at once.

To workaround it, free match_data as soon as we call regex_match. It's
fine because libselinux currently doesn't use match_data, but use only
the return value.

Signed-off-by: Inseob Kim <inseob@google.com>

---
v2:
  - add AGGRESSIVE_FREE_AFTER_REGEX_MATCH macro
  - remove match_data from struct regex_data
---
 libselinux/src/regex.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

Comments

Jason Zaman Jan. 15, 2023, 10:17 p.m. UTC | #1
On Thu, Jan 12, 2023 at 06:14:09PM +0900, Inseob Kim wrote:
> pcre's behavior is changed so that pcre2_match always allocates heap for
> match_data, rather than stack, regardless of size. The heap isn't freed
> until explicitly calling pcre2_match_data_free. This new behavior may
> result in heap overhead, which may increase the peak memory usage about
> a few megabytes. It's because regex_match is first called for regex_data
> objects, and then regex_data objects are freed at once.
> 
> To workaround it, free match_data as soon as we call regex_match. It's
> fine because libselinux currently doesn't use match_data, but use only
> the return value.
> 
> Signed-off-by: Inseob Kim <inseob@google.com>

Acked-by: Jason Zaman <jason@perfinion.com>

Merged,
Thanks!

> 
> ---
> v2:
>   - add AGGRESSIVE_FREE_AFTER_REGEX_MATCH macro
>   - remove match_data from struct regex_data
> ---
>  libselinux/src/regex.c | 32 +++++++++++++++++++++++++++++++-
>  1 file changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> index 149a7973..4b4b9f08 100644
> --- a/libselinux/src/regex.c
> +++ b/libselinux/src/regex.c
> @@ -60,11 +60,13 @@ char const *regex_arch_string(void)
>  
>  struct regex_data {
>  	pcre2_code *regex; /* compiled regular expression */
> +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
>  	/*
>  	 * match data block required for the compiled
>  	 * pattern in pcre2
>  	 */
>  	pcre2_match_data *match_data;
> +#endif
>  	pthread_mutex_t match_mutex;
>  };
>  
> @@ -84,11 +86,13 @@ int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
>  		goto err;
>  	}
>  
> +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
>  	(*regex)->match_data =
>  	    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
>  	if (!(*regex)->match_data) {
>  		goto err;
>  	}
> +#endif
>  	return 0;
>  
>  err:
> @@ -138,10 +142,12 @@ int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
>  		if (rc != 1)
>  			goto err;
>  
> +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
>  		(*regex)->match_data =
>  		    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
>  		if (!(*regex)->match_data)
>  			goto err;
> +#endif
>  
>  		*regex_compiled = true;
>  	}
> @@ -203,8 +209,12 @@ void regex_data_free(struct regex_data *regex)
>  	if (regex) {
>  		if (regex->regex)
>  			pcre2_code_free(regex->regex);
> +
> +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
>  		if (regex->match_data)
>  			pcre2_match_data_free(regex->match_data);
> +#endif
> +
>  		__pthread_mutex_destroy(&regex->match_mutex);
>  		free(regex);
>  	}
> @@ -213,10 +223,30 @@ void regex_data_free(struct regex_data *regex)
>  int regex_match(struct regex_data *regex, char const *subject, int partial)
>  {
>  	int rc;
> +	pcre2_match_data *match_data;
>  	__pthread_mutex_lock(&regex->match_mutex);
> +
> +#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
> +	match_data = pcre2_match_data_create_from_pattern(
> +	    regex->regex, NULL);
> +	if (match_data == NULL) {
> +		__pthread_mutex_unlock(&regex->match_mutex);
> +		return REGEX_ERROR;
> +	}
> +#else
> +	match_data = regex->match_data;
> +#endif
> +
>  	rc = pcre2_match(
>  	    regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> -	    partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
> +	    partial ? PCRE2_PARTIAL_SOFT : 0, match_data, NULL);
> +
> +#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
> +	// pcre2_match allocates heap and it won't be freed until
> +	// pcre2_match_data_free, resulting in heap overhead.
> +	pcre2_match_data_free(match_data);
> +#endif
> +
>  	__pthread_mutex_unlock(&regex->match_mutex);
>  	if (rc > 0)
>  		return REGEX_MATCH;
> -- 
> 2.39.0.314.g84b9a713c41-goog
>
diff mbox series

Patch

diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
index 149a7973..4b4b9f08 100644
--- a/libselinux/src/regex.c
+++ b/libselinux/src/regex.c
@@ -60,11 +60,13 @@  char const *regex_arch_string(void)
 
 struct regex_data {
 	pcre2_code *regex; /* compiled regular expression */
+#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
 	/*
 	 * match data block required for the compiled
 	 * pattern in pcre2
 	 */
 	pcre2_match_data *match_data;
+#endif
 	pthread_mutex_t match_mutex;
 };
 
@@ -84,11 +86,13 @@  int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
 		goto err;
 	}
 
+#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
 	(*regex)->match_data =
 	    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
 	if (!(*regex)->match_data) {
 		goto err;
 	}
+#endif
 	return 0;
 
 err:
@@ -138,10 +142,12 @@  int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
 		if (rc != 1)
 			goto err;
 
+#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
 		(*regex)->match_data =
 		    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
 		if (!(*regex)->match_data)
 			goto err;
+#endif
 
 		*regex_compiled = true;
 	}
@@ -203,8 +209,12 @@  void regex_data_free(struct regex_data *regex)
 	if (regex) {
 		if (regex->regex)
 			pcre2_code_free(regex->regex);
+
+#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
 		if (regex->match_data)
 			pcre2_match_data_free(regex->match_data);
+#endif
+
 		__pthread_mutex_destroy(&regex->match_mutex);
 		free(regex);
 	}
@@ -213,10 +223,30 @@  void regex_data_free(struct regex_data *regex)
 int regex_match(struct regex_data *regex, char const *subject, int partial)
 {
 	int rc;
+	pcre2_match_data *match_data;
 	__pthread_mutex_lock(&regex->match_mutex);
+
+#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
+	match_data = pcre2_match_data_create_from_pattern(
+	    regex->regex, NULL);
+	if (match_data == NULL) {
+		__pthread_mutex_unlock(&regex->match_mutex);
+		return REGEX_ERROR;
+	}
+#else
+	match_data = regex->match_data;
+#endif
+
 	rc = pcre2_match(
 	    regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
-	    partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
+	    partial ? PCRE2_PARTIAL_SOFT : 0, match_data, NULL);
+
+#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
+	// pcre2_match allocates heap and it won't be freed until
+	// pcre2_match_data_free, resulting in heap overhead.
+	pcre2_match_data_free(match_data);
+#endif
+
 	__pthread_mutex_unlock(&regex->match_mutex);
 	if (rc > 0)
 		return REGEX_MATCH;