From patchwork Thu Feb 9 11:42:52 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Huizhao Wang X-Patchwork-Id: 13134442 X-Patchwork-Delegate: plautrba@redhat.com Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 955E9C05027 for ; Thu, 9 Feb 2023 11:54:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229737AbjBILy5 (ORCPT ); Thu, 9 Feb 2023 06:54:57 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58486 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229539AbjBILy1 (ORCPT ); Thu, 9 Feb 2023 06:54:27 -0500 Received: from szxga08-in.huawei.com (szxga08-in.huawei.com [45.249.212.255]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9F4D3E3BC for ; Thu, 9 Feb 2023 03:43:02 -0800 (PST) Received: from kwepemi500007.china.huawei.com (unknown [172.30.72.55]) by szxga08-in.huawei.com (SkyGuard) with ESMTP id 4PCFL73MT5z16NX0 for ; Thu, 9 Feb 2023 19:40:47 +0800 (CST) Received: from Linux-SUSE12SP5.huawei.com (10.67.136.158) by kwepemi500007.china.huawei.com (7.221.188.207) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.34; Thu, 9 Feb 2023 19:42:59 +0800 From: wanghuizhao To: , , , , , CC: , , , , , , , , , , , , , Subject: [PATCH 1/2] libselinux: migrating hashtab from policycoreutils Date: Thu, 9 Feb 2023 19:42:52 +0800 Message-ID: <20230209114253.120485-2-wanghuizhao1@huawei.com> X-Mailer: git-send-email 2.12.3 In-Reply-To: <20230209114253.120485-1-wanghuizhao1@huawei.com> References: <20230209114253.120485-1-wanghuizhao1@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.67.136.158] X-ClientProxiedBy: dggems701-chm.china.huawei.com (10.3.19.178) To kwepemi500007.china.huawei.com (7.221.188.207) X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: selinux@vger.kernel.org To use hashtab in libselinux, migrate the existing hashtab template from policycoreutils/newrole to libselinux. Signed-off-by: wanghuizhao --- libselinux/src/hashtab.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++ libselinux/src/hashtab.h | 115 ++++++++++++++++++++++++++ 2 files changed, 323 insertions(+) create mode 100644 libselinux/src/hashtab.c create mode 100644 libselinux/src/hashtab.h diff --git a/libselinux/src/hashtab.c b/libselinux/src/hashtab.c new file mode 100644 index 00000000..26d4f4c7 --- /dev/null +++ b/libselinux/src/hashtab.c @@ -0,0 +1,208 @@ + +/* Author : Stephen Smalley, */ + +/* FLASK */ + +/* + * Implementation of the hash table type. + */ + +#include +#include +#include "hashtab.h" + +hashtab_t hashtab_create(unsigned int (*hash_value) (hashtab_t h, + const_hashtab_key_t key), + int (*keycmp) (hashtab_t h, + const_hashtab_key_t key1, + const_hashtab_key_t key2), + unsigned int size) +{ + + hashtab_t p; + unsigned int i; + + p = (hashtab_t) malloc(sizeof(hashtab_val_t)); + if (p == NULL) + return p; + + memset(p, 0, sizeof(hashtab_val_t)); + p->size = size; + p->nel = 0; + p->hash_value = hash_value; + p->keycmp = keycmp; + p->htable = (hashtab_ptr_t *) malloc(sizeof(hashtab_ptr_t) * size); + if (p->htable == NULL) { + free(p); + return NULL; + } + for (i = 0; i < size; i++) + p->htable[i] = (hashtab_ptr_t) NULL; + + return p; +} + +int hashtab_insert(hashtab_t h, hashtab_key_t key, hashtab_datum_t datum) +{ + unsigned int hvalue; + hashtab_ptr_t prev, cur, newnode; + + if (!h) + return HASHTAB_OVERFLOW; + + hvalue = h->hash_value(h, key); + prev = NULL; + cur = h->htable[hvalue]; + while (cur && h->keycmp(h, key, cur->key) > 0) { + prev = cur; + cur = cur->next; + } + + if (cur && (h->keycmp(h, key, cur->key) == 0)) + return HASHTAB_PRESENT; + + newnode = (hashtab_ptr_t) malloc(sizeof(hashtab_node_t)); + if (newnode == NULL) + return HASHTAB_OVERFLOW; + memset(newnode, 0, sizeof(struct hashtab_node)); + newnode->key = key; + newnode->datum = datum; + if (prev) { + newnode->next = prev->next; + prev->next = newnode; + } else { + newnode->next = h->htable[hvalue]; + h->htable[hvalue] = newnode; + } + + h->nel++; + return HASHTAB_SUCCESS; +} + +int hashtab_remove(hashtab_t h, hashtab_key_t key, + void (*destroy) (hashtab_key_t k, + hashtab_datum_t d, void *args), void *args) +{ + unsigned int hvalue; + hashtab_ptr_t cur, last; + + if (!h) + return HASHTAB_MISSING; + + hvalue = h->hash_value(h, key); + last = NULL; + cur = h->htable[hvalue]; + while (cur != NULL && h->keycmp(h, key, cur->key) > 0) { + last = cur; + cur = cur->next; + } + + if (cur == NULL || (h->keycmp(h, key, cur->key) != 0)) + return HASHTAB_MISSING; + + if (last == NULL) + h->htable[hvalue] = cur->next; + else + last->next = cur->next; + + if (destroy) + destroy(cur->key, cur->datum, args); + free(cur); + h->nel--; + return HASHTAB_SUCCESS; +} + +hashtab_datum_t hashtab_search(hashtab_t h, const_hashtab_key_t key) +{ + + unsigned int hvalue; + hashtab_ptr_t cur; + + if (!h) + return NULL; + + hvalue = h->hash_value(h, key); + cur = h->htable[hvalue]; + while (cur != NULL && h->keycmp(h, key, cur->key) > 0) + cur = cur->next; + + if (cur == NULL || (h->keycmp(h, key, cur->key) != 0)) + return NULL; + + return cur->datum; +} + +void hashtab_destroy(hashtab_t h) +{ + unsigned int i; + hashtab_ptr_t cur, temp; + + if (!h) + return; + + for (i = 0; i < h->size; i++) { + cur = h->htable[i]; + while (cur != NULL) { + temp = cur; + cur = cur->next; + free(temp); + } + h->htable[i] = NULL; + } + + free(h->htable); + h->htable = NULL; + + free(h); +} + +int hashtab_map(hashtab_t h, + int (*apply) (hashtab_key_t k, + hashtab_datum_t d, void *args), void *args) +{ + unsigned int i; + hashtab_ptr_t cur; + int ret; + + if (!h) + return HASHTAB_SUCCESS; + + for (i = 0; i < h->size; i++) { + cur = h->htable[i]; + while (cur != NULL) { + ret = apply(cur->key, cur->datum, args); + if (ret) + return ret; + cur = cur->next; + } + } + return HASHTAB_SUCCESS; +} + +void hashtab_hash_eval(hashtab_t h, char *tag) +{ + unsigned int i; + int chain_len, slots_used, max_chain_len; + hashtab_ptr_t cur; + + slots_used = 0; + max_chain_len = 0; + for (i = 0; i < h->size; i++) { + cur = h->htable[i]; + if (cur) { + slots_used++; + chain_len = 0; + while (cur) { + chain_len++; + cur = cur->next; + } + + if (chain_len > max_chain_len) + max_chain_len = chain_len; + } + } + + printf + ("%s: %d entries and %d/%d buckets used, longest chain length %d\n", + tag, h->nel, slots_used, h->size, max_chain_len); +} diff --git a/libselinux/src/hashtab.h b/libselinux/src/hashtab.h new file mode 100644 index 00000000..092b96a9 --- /dev/null +++ b/libselinux/src/hashtab.h @@ -0,0 +1,115 @@ + +/* Author : Stephen Smalley, */ + +/* FLASK */ + +/* + * A hash table (hashtab) maintains associations between + * key values and datum values. The type of the key values + * and the type of the datum values is arbitrary. The + * functions for hash computation and key comparison are + * provided by the creator of the table. + */ + +#ifndef _NEWROLE_HASHTAB_H_ +#define _NEWROLE_HASHTAB_H_ + +#include +#include +#include + +typedef char *hashtab_key_t; /* generic key type */ +typedef const char *const_hashtab_key_t; /* constant generic key type */ +typedef void *hashtab_datum_t; /* generic datum type */ + +typedef struct hashtab_node *hashtab_ptr_t; + +typedef struct hashtab_node { + hashtab_key_t key; + hashtab_datum_t datum; + hashtab_ptr_t next; +} hashtab_node_t; + +typedef struct hashtab_val { + hashtab_ptr_t *htable; /* hash table */ + unsigned int size; /* number of slots in hash table */ + uint32_t nel; /* number of elements in hash table */ + unsigned int (*hash_value) (struct hashtab_val * h, const_hashtab_key_t key); /* hash function */ + int (*keycmp) (struct hashtab_val * h, const_hashtab_key_t key1, const_hashtab_key_t key2); /* key comparison function */ +} hashtab_val_t; + +typedef hashtab_val_t *hashtab_t; + +/* Define status codes for hash table functions */ +#define HASHTAB_SUCCESS 0 +#define HASHTAB_OVERFLOW -ENOMEM +#define HASHTAB_PRESENT -EEXIST +#define HASHTAB_MISSING -ENOENT + +/* + Creates a new hash table with the specified characteristics. + + Returns NULL if insufficient space is available or + the new hash table otherwise. + */ +extern hashtab_t hashtab_create(unsigned int (*hash_value) (hashtab_t h, + const_hashtab_key_t + key), + int (*keycmp) (hashtab_t h, + const_hashtab_key_t key1, + const_hashtab_key_t key2), + unsigned int size); +/* + Inserts the specified (key, datum) pair into the specified hash table. + + Returns HASHTAB_OVERFLOW if insufficient space is available or + HASHTAB_PRESENT if there is already an entry with the same key or + HASHTAB_SUCCESS otherwise. + */ +extern int hashtab_insert(hashtab_t h, hashtab_key_t k, hashtab_datum_t d); + +/* + Removes the entry with the specified key from the hash table. + Applies the specified destroy function to (key,datum,args) for + the entry. + + Returns HASHTAB_MISSING if no entry has the specified key or + HASHTAB_SUCCESS otherwise. + */ +extern int hashtab_remove(hashtab_t h, hashtab_key_t k, + void (*destroy) (hashtab_key_t k, + hashtab_datum_t d, + void *args), void *args); + +/* + Searches for the entry with the specified key in the hash table. + + Returns NULL if no entry has the specified key or + the datum of the entry otherwise. + */ +extern hashtab_datum_t hashtab_search(hashtab_t h, const_hashtab_key_t k); + +/* + Destroys the specified hash table. + */ +extern void hashtab_destroy(hashtab_t h); + +/* + Applies the specified apply function to (key,datum,args) + for each entry in the specified hash table. + + The order in which the function is applied to the entries + is dependent upon the internal structure of the hash table. + + If apply returns a non-zero status, then hashtab_map will cease + iterating through the hash table and will propagate the error + return to its caller. + */ +extern int hashtab_map(hashtab_t h, + int (*apply) (hashtab_key_t k, + hashtab_datum_t d, + void *args), void *args); + +extern void hashtab_hash_eval(hashtab_t h, char *tag); + +#endif From patchwork Thu Feb 9 11:42:53 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Huizhao Wang X-Patchwork-Id: 13134443 X-Patchwork-Delegate: plautrba@redhat.com Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0041DC636D7 for ; Thu, 9 Feb 2023 11:54:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229755AbjBILy6 (ORCPT ); Thu, 9 Feb 2023 06:54:58 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58488 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229567AbjBILy1 (ORCPT ); Thu, 9 Feb 2023 06:54:27 -0500 Received: from szxga02-in.huawei.com (szxga02-in.huawei.com [45.249.212.188]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5B148EC6B for ; Thu, 9 Feb 2023 03:43:03 -0800 (PST) Received: from kwepemi500007.china.huawei.com (unknown [172.30.72.57]) by szxga02-in.huawei.com (SkyGuard) with ESMTP id 4PCFKv6fjfzRrxr for ; Thu, 9 Feb 2023 19:40:35 +0800 (CST) Received: from Linux-SUSE12SP5.huawei.com (10.67.136.158) by kwepemi500007.china.huawei.com (7.221.188.207) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.34; Thu, 9 Feb 2023 19:43:00 +0800 From: wanghuizhao To: , , , , , CC: , , , , , , , , , , , , , Subject: [PATCH 2/2] libselinux: performance optimization for duplicate detection Date: Thu, 9 Feb 2023 19:42:53 +0800 Message-ID: <20230209114253.120485-3-wanghuizhao1@huawei.com> X-Mailer: git-send-email 2.12.3 In-Reply-To: <20230209114253.120485-1-wanghuizhao1@huawei.com> References: <20230209114253.120485-1-wanghuizhao1@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.67.136.158] X-ClientProxiedBy: dggems701-chm.china.huawei.com (10.3.19.178) To kwepemi500007.china.huawei.com (7.221.188.207) X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: selinux@vger.kernel.org When semodule -i some.pp to install a module package, duplicate items are detected for the module. The detection function is nodups_specs in libselinux/src/label_file.c. The algorithm complexity of implementing this function is O(N^2). In scenarios where N is very large, the efficiency is very low. To solve this problem, I propose to use the hash table to detect duplicates. The algorithm complexity of new implementing is O(N). The execution efficiency will be greatly improved. Comparison between the execution time of the nodups_specs function. Old double-layer loop implementation O(N^2): semodule -i myapp1.pp nodups_specs data->nspec: 5002 nodups_specs start: 11785.242s nodups_specs end: 11785.588s nodups_specs consumes: 0.346s semodule -i myapp2.pp nodups_specs data->nspec: 10002 nodups_specs start: 11804.280s nodups_specs end: 11806.546s nodups_specs consumes: 2.266s semodule -i myapp3.pp nodups_specs data->nspec: 20002 nodups_specs start: 11819.106s nodups_specs end: 11830.892s nodups_specs consumes: 11.786s New hash table implementation O(N): semodule -i myapp1.pp nodups_specs data->nspec: 5002 nodups_specs start: 11785.588s nodups_specs end: 11785.590s nodups_specs consumes: 0.002s semodule -i myapp2.pp nodups_specs data->nspec: 10002 nodups_specs start: 11806.546s nodups_specs end: 11806.552s nodups_specs consumes: 0.006s semodule -i myapp3.pp nodups_specs data->nspec: 20002 nodups_specs start: 11830.892s nodups_specs end: 11830.905s nodups_specs consumes: 0.013s Signed-off-by: wanghuizhao --- libselinux/src/label_file.c | 112 ++++++++++++++++++++++++++++++++++---------- libselinux/src/label_file.h | 5 ++ 2 files changed, 93 insertions(+), 24 deletions(-) diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c index 74ae9b9f..e4a85043 100644 --- a/libselinux/src/label_file.c +++ b/libselinux/src/label_file.c @@ -19,6 +19,7 @@ #include #include +#include "hashtab.h" #include "callbacks.h" #include "label_internal.h" #include "label_file.h" @@ -57,40 +58,103 @@ static int find_stem_from_file(struct saved_data *data, const char *key) } /* + * hash calculation and key comparison of hash table + */ + +static unsigned int symhash(hashtab_t h, const_hashtab_key_t key) +{ + const struct chkdups_key *k = (const struct chkdups_key *)key; + const char *p = NULL; + size_t size; + unsigned int val = 0; + + size = strlen(k->regex); + for (p = k->regex; ((size_t) (p - k->regex)) < size; p++) + val = + (val << 4 | (val >> (8 * sizeof(unsigned int) - 4)) + + k->mode) ^ (*p); + return val % h->size; +} + +static int symcmp(hashtab_t h + __attribute__ ((unused)), const_hashtab_key_t key1, + const_hashtab_key_t key2) +{ + const struct chkdups_key *a = (const struct chkdups_key *)key1; + const struct chkdups_key *b = (const struct chkdups_key *)key2; + + return strcmp(a->regex, b->regex) || (a->mode && b->mode && a->mode != b->mode); +} + +/* * Warn about duplicate specifications. */ static int nodups_specs(struct saved_data *data, const char *path) { - int rc = 0; - unsigned int ii, jj; + int rc = 0, ret = 0; + unsigned int ii; struct spec *curr_spec, *spec_arr = data->spec_arr; + struct chkdups_key *new = NULL; + unsigned int hashtab_len = (data->nspec / 10) ? data->nspec / 10 : 1; + hashtab_ptr_t cur, temp; + hashtab_t hash_table = hashtab_create(symhash, symcmp, data->nspec); + if (hash_table == NULL) { + rc = -1; + COMPAT_LOG(SELINUX_ERROR, "%s: hashtab create failed.\n", path); + return rc; + } for (ii = 0; ii < data->nspec; ii++) { - curr_spec = &spec_arr[ii]; - for (jj = ii + 1; jj < data->nspec; jj++) { - if ((!strcmp(spec_arr[jj].regex_str, - curr_spec->regex_str)) - && (!spec_arr[jj].mode || !curr_spec->mode - || spec_arr[jj].mode == curr_spec->mode)) { - rc = -1; - errno = EINVAL; - if (strcmp(spec_arr[jj].lr.ctx_raw, - curr_spec->lr.ctx_raw)) { - COMPAT_LOG - (SELINUX_ERROR, - "%s: Multiple different specifications for %s (%s and %s).\n", - path, curr_spec->regex_str, - spec_arr[jj].lr.ctx_raw, - curr_spec->lr.ctx_raw); - } else { - COMPAT_LOG - (SELINUX_ERROR, - "%s: Multiple same specifications for %s.\n", - path, curr_spec->regex_str); - } + new = (struct chkdups_key *)malloc(sizeof(struct chkdups_key)); + new->regex = spec_arr[ii].regex_str; + new->mode = spec_arr[ii].mode; + ret = hashtab_insert(hash_table, (hashtab_key_t)new, &spec_arr[ii]); + if (ret == HASHTAB_SUCCESS) + continue; + if (ret == HASHTAB_PRESENT) { + curr_spec = + (struct spec *)hashtab_search(hash_table, (hashtab_key_t)new); + rc = -1; + errno = EINVAL; + if (strcmp(spec_arr[ii].lr.ctx_raw, curr_spec->lr.ctx_raw)) { + COMPAT_LOG + (SELINUX_ERROR, + "%s: Multiple different specifications for %s (%s and %s).\n", + path, curr_spec->regex_str, + spec_arr[ii].lr.ctx_raw, + curr_spec->lr.ctx_raw); + } else { + COMPAT_LOG + (SELINUX_ERROR, + "%s: Multiple same specifications for %s.\n", + path, curr_spec->regex_str); } } + if (ret == HASHTAB_OVERFLOW) { + rc = -1; + COMPAT_LOG + (SELINUX_ERROR, + "%s: hashtab happen memory error.\n", + path); + break; + } + } + + for (ii = 0; ii < hashtab_len; ii++) { + cur = hash_table->htable[ii]; + while (cur != NULL) { + temp = cur; + cur = cur->next; + free(temp->key); + free(temp); + } + hash_table->htable[ii] = NULL; } + + free(hash_table->htable); + hash_table->htable = NULL; + free(hash_table); + return rc; } diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h index 190bc175..ad79319e 100644 --- a/libselinux/src/label_file.h +++ b/libselinux/src/label_file.h @@ -35,6 +35,11 @@ /* Required selinux_restorecon and selabel_get_digests_all_partial_matches() */ #define RESTORECON_PARTIAL_MATCH_DIGEST "security.sehash" +struct chkdups_key { + char *regex; + unsigned int mode; +}; + struct selabel_sub { char *src; int slen;