diff mbox series

[v3,3/3] libselinux: performance optimization for duplicate detection

Message ID 20230308095308.60661-4-wanghuizhao1@huawei.com (mailing list archive)
State Accepted
Commit 2c7b71db5689
Delegated to: Petr Lautrbach
Headers show
Series Improve efficiency of detecting duplicate in libselinux | expand

Commit Message

Huizhao Wang March 8, 2023, 9:53 a.m. UTC
When semodule -i some.pp to install a module package, duplicate items are
detected for the module. The detection function is nodups_specs in
libselinux/src/label_file.c. The algorithm complexity of implementing
this function is O(M*(N^2)). M is a symbol related to the length of a string.
N indicates the number of data->nspec. In scenarios where N is very large, the
efficiency is very low.

To solve this problem, I propose to use the hash table to detect duplicates.
The algorithm complexity of new implementing is O(M*N). The execution
efficiency will be greatly improved.

Comparison between the execution time of the nodups_specs function.

Old double-layer loop implementation O(M*(N^2)):

semodule -i myapp1.pp
nodups_specs data->nspec: 5002
nodups_specs start: 11785.242s
nodups_specs end:   11785.588s
nodups_specs consumes:  0.346s

semodule -i myapp2.pp
nodups_specs data->nspec: 10002
nodups_specs start: 11804.280s
nodups_specs end:   11806.546s
nodups_specs consumes:  2.266s

semodule -i myapp3.pp
nodups_specs data->nspec: 20002
nodups_specs start: 11819.106s
nodups_specs end:   11830.892s
nodups_specs consumes: 11.786s

New hash table implementation O(M*N):

semodule -i myapp1.pp
nodups_specs data->nspec: 5002
nodups_specs start: 11785.588s
nodups_specs end:   11785.590s
nodups_specs consumes:  0.002s

semodule -i myapp2.pp
nodups_specs data->nspec: 10002
nodups_specs start: 11806.546s
nodups_specs end:   11806.552s
nodups_specs consumes:  0.006s

semodule -i myapp3.pp
nodups_specs data->nspec: 20002
nodups_specs start: 11830.892s
nodups_specs end:   11830.905s
nodups_specs consumes:  0.013s

Signed-off-by: wanghuizhao <wanghuizhao1@huawei.com>
---
 libselinux/src/label_file.c | 120 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 96 insertions(+), 24 deletions(-)
diff mbox series

Patch

diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
index 74ae9b9f..d489c1f7 100644
--- a/libselinux/src/label_file.c
+++ b/libselinux/src/label_file.c
@@ -19,10 +19,19 @@ 
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include "hashtab.h"
 #include "callbacks.h"
 #include "label_internal.h"
 #include "label_file.h"
 
+/* controls the shrink multiple of the hashtab length */
+#define SHRINK_MULTIS 1
+
+struct chkdups_key {
+	char *regex;
+	unsigned int mode;
+};
+
 /*
  * Internals, mostly moved over from matchpathcon.c
  */
@@ -57,40 +66,103 @@  static int find_stem_from_file(struct saved_data *data, const char *key)
 }
 
 /*
+ * hash calculation and key comparison of hash table
+ */
+
+static unsigned int symhash(hashtab_t h, const_hashtab_key_t key)
+{
+	const struct chkdups_key *k = (const struct chkdups_key *)key;
+	const char *p = NULL;
+	size_t size;
+	unsigned int val = 0;
+
+	size = strlen(k->regex);
+	for (p = k->regex; ((size_t) (p - k->regex)) < size; p++)
+		val =
+			((val << 4) | (val >> (8 * sizeof(unsigned int) - 4))) ^ (*p);
+	return val % h->size;
+}
+
+static int symcmp(hashtab_t h
+		  __attribute__ ((unused)), const_hashtab_key_t key1,
+		  const_hashtab_key_t key2)
+{
+	const struct chkdups_key *a = (const struct chkdups_key *)key1;
+	const struct chkdups_key *b = (const struct chkdups_key *)key2;
+
+	return strcmp(a->regex, b->regex) || (a->mode && b->mode && a->mode != b->mode);
+}
+
+static int destroy_chkdups_key(hashtab_key_t key)
+{
+	free(key);
+
+	return 0;
+}
+
+/*
  * Warn about duplicate specifications.
  */
 static int nodups_specs(struct saved_data *data, const char *path)
 {
-	int rc = 0;
-	unsigned int ii, jj;
+	int rc = 0, ret = 0;
+	unsigned int ii;
 	struct spec *curr_spec, *spec_arr = data->spec_arr;
+	struct chkdups_key *new = NULL;
+	unsigned int hashtab_len = (data->nspec / SHRINK_MULTIS) ? data->nspec / SHRINK_MULTIS : 1;
 
+	hashtab_t hash_table = hashtab_create(symhash, symcmp, hashtab_len);
+	if (!hash_table) {
+		rc = -1;
+		COMPAT_LOG(SELINUX_ERROR, "%s: hashtab create failed.\n", path);
+		return rc;
+	}
 	for (ii = 0; ii < data->nspec; ii++) {
-		curr_spec = &spec_arr[ii];
-		for (jj = ii + 1; jj < data->nspec; jj++) {
-			if ((!strcmp(spec_arr[jj].regex_str,
-				curr_spec->regex_str))
-			    && (!spec_arr[jj].mode || !curr_spec->mode
-				|| spec_arr[jj].mode == curr_spec->mode)) {
-				rc = -1;
-				errno = EINVAL;
-				if (strcmp(spec_arr[jj].lr.ctx_raw,
-					    curr_spec->lr.ctx_raw)) {
-					COMPAT_LOG
-						(SELINUX_ERROR,
-						 "%s: Multiple different specifications for %s  (%s and %s).\n",
-						 path, curr_spec->regex_str,
-						 spec_arr[jj].lr.ctx_raw,
-						 curr_spec->lr.ctx_raw);
-				} else {
-					COMPAT_LOG
-						(SELINUX_ERROR,
-						 "%s: Multiple same specifications for %s.\n",
-						 path, curr_spec->regex_str);
-				}
+		new = (struct chkdups_key *)malloc(sizeof(struct chkdups_key));
+		if (!new) {
+			rc = -1;
+			hashtab_destroy_key(hash_table, destroy_chkdups_key);
+			COMPAT_LOG(SELINUX_ERROR, "%s: hashtab key create failed.\n", path);
+			return rc;
+		}
+		new->regex = spec_arr[ii].regex_str;
+		new->mode = spec_arr[ii].mode;
+		ret = hashtab_insert(hash_table, (hashtab_key_t)new, &spec_arr[ii]);
+		if (ret == HASHTAB_SUCCESS)
+			continue;
+		if (ret == HASHTAB_PRESENT) {
+			curr_spec =
+				(struct spec *)hashtab_search(hash_table, (hashtab_key_t)new);
+			rc = -1;
+			errno = EINVAL;
+			free(new);
+			if (strcmp(spec_arr[ii].lr.ctx_raw, curr_spec->lr.ctx_raw)) {
+				COMPAT_LOG
+					(SELINUX_ERROR,
+					 "%s: Multiple different specifications for %s  (%s and %s).\n",
+					 path, curr_spec->regex_str,
+					 spec_arr[ii].lr.ctx_raw,
+					 curr_spec->lr.ctx_raw);
+			} else {
+				COMPAT_LOG
+					(SELINUX_ERROR,
+					 "%s: Multiple same specifications for %s.\n",
+					 path, curr_spec->regex_str);
 			}
 		}
+		if (ret == HASHTAB_OVERFLOW) {
+			rc = -1;
+			free(new);
+			COMPAT_LOG
+				(SELINUX_ERROR,
+				"%s: hashtab happen memory error.\n",
+				path);
+			break;
+		}
 	}
+
+	hashtab_destroy_key(hash_table, destroy_chkdups_key);
+
 	return rc;
 }