diff mbox series

[RFC] selinux: assorted hash table improvements

Message ID 20231114061656.557513-2-paul@paul-moore.com (mailing list archive)
State New, archived
Delegated to: Paul Moore
Headers show
Series [RFC] selinux: assorted hash table improvements | expand

Commit Message

Paul Moore Nov. 14, 2023, 6:16 a.m. UTC
From: Linus Torvalds <torvalds@linux-foundation.org>

This is an inline patch version of a patch submitted by Linus in the
thread below.  Do not merge this patch anywhere without additional
testing or a proper commit message.

* https://lore.kernel.org/selinux/20231111160954.45911-2-paul@paul-moore.com
---
 security/selinux/ss/hashtab.c  | 78 +++++++++++++++++-----------------
 security/selinux/ss/hashtab.h  | 41 ++++++++++--------
 security/selinux/ss/policydb.c |  2 +-
 3 files changed, 64 insertions(+), 57 deletions(-)

Comments

Linus Torvalds Nov. 14, 2023, 5:38 p.m. UTC | #1
On Mon, 13 Nov 2023 at 22:17, Paul Moore <paul@paul-moore.com> wrote:
>
> From: Linus Torvalds <torvalds@linux-foundation.org>
>
> This is an inline patch version of a patch submitted by Linus in the
> thread below.  Do not merge this patch anywhere without additional
> testing or a proper commit message.
>
> * https://lore.kernel.org/selinux/20231111160954.45911-2-paul@paul-moore.com

A couple of notes on this.

The *point* of the patch basically boils down to this particular change:

> +static inline struct hashtab_node **hashtab_entry(struct hashtab *h,
> +       const void *key, const struct hashtab_key_params key_params)
> +{
> +       return h->htable + hash_32(key_params.hash(key), h->hbits);
> +}
...
> -        ... key_params.hash(key) & (h->size - 1);
> +       ... hashtab_entry(h, key, key_params);

which basically means that now instead of just using the low bits of
whatever hash value, it actually uses all bits of the 32-bit hash.

It doesn't much matter as things stand now, because

 (a) I'm not convinced of how performance-critical this is (there is
*very* performance-critical hashing in selinux, but it's mainly the
AVC lookup which doesn't use the hashtab model at all)

 (b) a lot of the hashing intentionally tries to put the bits in low
bits - exactly *because* this code used to just use the low bits. IOW,
we have things like this:

    static u32 rangetr_hash(const void *k)
    ...
        return key->source_type + (key->target_type << 3) +
                (key->target_class << 5);

which basically tries pretty hard to keep things in the low bits
rather than spread things out too much or too well

 (c) the code hasn't ever done a good job before

For example, with the old "filenametr_hash()" it used
partial_name_hash(), but then never did the final end_name_hash(), so
it was never even folded down to an u32, and the high bits of the hash
on a 64-bit machine were dropped entirely even before it then only
used the low bits thanks to that '& (h->size-1)' masking.

Anyway, for this patch to really matter, the hashing itself should
matter (questionable) and the existing hash functions like that
rangetr_hash() and filenametr_hash() should be better than they are.

Paul already sent out a patch to improve on filenametr_hash(),
although it is the case that full_name_hash() is really meant for
*counted* strings, and since filenametr_hash() has to do the
'strlen()' just to see how lonbg the string is, it's not exactly
efficient there either.

Oh well. We do have a "hash and count length a word at a time"
function, but that one is for pathnames, and stops at '/' characters
in addition to terminating NUL characters.

                Linus
diff mbox series

Patch

diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c
index c05d8346a94a..b4bc1aebebe6 100644
--- a/security/selinux/ss/hashtab.c
+++ b/security/selinux/ss/hashtab.c
@@ -24,25 +24,27 @@  static struct kmem_cache *hashtab_node_cachep __ro_after_init;
  * The total memory used by the htable arrays (only) with Fedora policy loaded
  * is approximately 163 KB at the time of writing.
  */
-static u32 hashtab_compute_size(u32 nel)
+static u32 hashtab_compute_hbits(u32 nel)
 {
-	return nel == 0 ? 0 : roundup_pow_of_two(nel);
+	if (nel <= 1)
+		return nel;
+	return ilog2(nel - 1) + 1;
 }
 
 int hashtab_init(struct hashtab *h, u32 nel_hint)
 {
-	u32 size = hashtab_compute_size(nel_hint);
+	u32 hbits = hashtab_compute_hbits(nel_hint);
 
 	/* should already be zeroed, but better be safe */
 	h->nel = 0;
-	h->size = 0;
+	h->hbits = 0;
 	h->htable = NULL;
 
-	if (size) {
-		h->htable = kcalloc(size, sizeof(*h->htable), GFP_KERNEL);
+	if (hbits) {
+		h->htable = kcalloc(1 << hbits, sizeof(*h->htable), GFP_KERNEL);
 		if (!h->htable)
 			return -ENOMEM;
-		h->size = size;
+		h->hbits = hbits;
 	}
 	return 0;
 }
@@ -66,10 +68,11 @@  int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst,
 
 void hashtab_destroy(struct hashtab *h)
 {
-	u32 i;
-	struct hashtab_node *cur, *temp;
+	u32 size = hashtab_size(h);
+
+	for (u32 i = 0; i < size; i++) {
+		struct hashtab_node *cur, *temp;
 
-	for (i = 0; i < h->size; i++) {
 		cur = h->htable[i];
 		while (cur) {
 			temp = cur;
@@ -81,20 +84,19 @@  void hashtab_destroy(struct hashtab *h)
 
 	kfree(h->htable);
 	h->htable = NULL;
+	h->hbits = 0;
 }
 
 int hashtab_map(struct hashtab *h,
 		int (*apply)(void *k, void *d, void *args),
 		void *args)
 {
-	u32 i;
-	int ret;
-	struct hashtab_node *cur;
+	u32 size = hashtab_size(h);
 
-	for (i = 0; i < h->size; i++) {
-		cur = h->htable[i];
+	for (u32 i = 0; i < size; i++) {
+		struct hashtab_node *cur = cur = h->htable[i];
 		while (cur) {
-			ret = apply(cur->key, cur->datum, args);
+			int ret = apply(cur->key, cur->datum, args);
 			if (ret)
 				return ret;
 			cur = cur->next;
@@ -106,18 +108,18 @@  int hashtab_map(struct hashtab *h,
 #ifdef CONFIG_SECURITY_SELINUX_DEBUG
 void hashtab_stat(struct hashtab *h, struct hashtab_info *info)
 {
-	u32 i, chain_len, slots_used, max_chain_len;
+	u32 size = hashtab_size(h);
+	u32 slots_used, max_chain_len;
 	u64 chain2_len_sum;
-	struct hashtab_node *cur;
 
 	slots_used = 0;
 	max_chain_len = 0;
 	chain2_len_sum = 0;
-	for (i = 0; i < h->size; i++) {
-		cur = h->htable[i];
+	for (u32 i = 0; i < size; i++) {
+		struct hashtab_node *cur = h->htable[i];
 		if (cur) {
+			u32 chain_len = 0;
 			slots_used++;
-			chain_len = 0;
 			while (cur) {
 				chain_len++;
 				cur = cur->next;
@@ -142,36 +144,35 @@  int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
 		int (*destroy)(void *k, void *d, void *args),
 		void *args)
 {
-	struct hashtab_node *cur, *tmp, *tail;
-	u32 i;
-	int rc;
+	u32 size = hashtab_size(orig);
 
 	memset(new, 0, sizeof(*new));
 
-	new->htable = kcalloc(orig->size, sizeof(*new->htable), GFP_KERNEL);
-	if (!new->htable)
-		return -ENOMEM;
+	if (size) {
+		new->htable = kcalloc(size, sizeof(*new->htable), GFP_KERNEL);
+		if (!new->htable)
+			return -ENOMEM;
+		new->hbits = orig->hbits;
+	}
 
-	new->size = orig->size;
+	for (u32 i = 0; i < size; i++) {
+		struct hashtab_node *cur, **tailp;
+		tailp = new->htable + i;
 
-	for (i = 0; i < orig->size; i++) {
-		tail = NULL;
 		for (cur = orig->htable[i]; cur; cur = cur->next) {
+			struct hashtab_node *tmp;
+
 			tmp = kmem_cache_zalloc(hashtab_node_cachep,
 						GFP_KERNEL);
 			if (!tmp)
 				goto error;
-			rc = copy(tmp, cur, args);
-			if (rc) {
+			if (copy(tmp, cur, args)) {
 				kmem_cache_free(hashtab_node_cachep, tmp);
 				goto error;
 			}
 			tmp->next = NULL;
-			if (!tail)
-				new->htable[i] = tmp;
-			else
-				tail->next = tmp;
-			tail = tmp;
+			*tailp = tmp;
+			tailp = &tmp->next;
 			new->nel++;
 		}
 	}
@@ -179,7 +180,8 @@  int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
 	return 0;
 
  error:
-	for (i = 0; i < new->size; i++) {
+	for (u32 i = 0; i < size; i++) {
+		struct hashtab_node *cur, *tmp;
 		for (cur = new->htable[i]; cur; cur = tmp) {
 			tmp = cur->next;
 			destroy(cur->key, cur->datum, args);
diff --git a/security/selinux/ss/hashtab.h b/security/selinux/ss/hashtab.h
index 09b0a3744937..6b65c9a52559 100644
--- a/security/selinux/ss/hashtab.h
+++ b/security/selinux/ss/hashtab.h
@@ -14,6 +14,7 @@ 
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/hash.h>
 
 #define HASHTAB_MAX_NODES	U32_MAX
 
@@ -31,7 +32,7 @@  struct hashtab_node {
 
 struct hashtab {
 	struct hashtab_node **htable;	/* hash table */
-	u32 size;			/* number of slots in hash table */
+	u32 hbits;			/* number of slots in hash table */
 	u32 nel;			/* number of elements in hash table */
 };
 
@@ -41,6 +42,11 @@  struct hashtab_info {
 	u64 chain2_len_sum;
 };
 
+static inline u32 hashtab_size(const struct hashtab *h)
+{
+	return h->hbits ? 1 << h->hbits : 0;
+}
+
 /*
  * Initializes a new hash table with the specified characteristics.
  *
@@ -51,6 +57,12 @@  int hashtab_init(struct hashtab *h, u32 nel_hint);
 int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst,
 		     void *key, void *datum);
 
+static inline struct hashtab_node **hashtab_entry(struct hashtab *h,
+	const void *key, const struct hashtab_key_params key_params)
+{
+	return h->htable + hash_32(key_params.hash(key), h->hbits);
+}
+
 /*
  * Inserts the specified (key, datum) pair into the specified hash table.
  *
@@ -60,32 +72,27 @@  int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst,
   0 otherwise.
  */
 static inline int hashtab_insert(struct hashtab *h, void *key, void *datum,
-				 struct hashtab_key_params key_params)
+				 const struct hashtab_key_params key_params)
 {
-	u32 hvalue;
-	struct hashtab_node *prev, *cur;
+	struct hashtab_node **pprev;
 
 	cond_resched();
 
-	if (!h->size || h->nel == HASHTAB_MAX_NODES)
+	if (!h->hbits || h->nel == HASHTAB_MAX_NODES)
 		return -EINVAL;
 
-	hvalue = key_params.hash(key) & (h->size - 1);
-	prev = NULL;
-	cur = h->htable[hvalue];
-	while (cur) {
+	pprev = hashtab_entry(h, key, key_params);
+	for (struct hashtab_node *cur; (cur = *pprev) != NULL; ) {
 		int cmp = key_params.cmp(key, cur->key);
 
 		if (cmp == 0)
 			return -EEXIST;
 		if (cmp < 0)
 			break;
-		prev = cur;
-		cur = cur->next;
+		pprev = &cur->next;
 	}
 
-	return __hashtab_insert(h, prev ? &prev->next : &h->htable[hvalue],
-				key, datum);
+	return __hashtab_insert(h, pprev, key, datum);
 }
 
 /*
@@ -95,16 +102,14 @@  static inline int hashtab_insert(struct hashtab *h, void *key, void *datum,
  * the datum of the entry otherwise.
  */
 static inline void *hashtab_search(struct hashtab *h, const void *key,
-				   struct hashtab_key_params key_params)
+				   const struct hashtab_key_params key_params)
 {
-	u32 hvalue;
 	struct hashtab_node *cur;
 
-	if (!h->size)
+	if (!h->hbits)
 		return NULL;
 
-	hvalue = key_params.hash(key) & (h->size - 1);
-	cur = h->htable[hvalue];
+	cur = *hashtab_entry(h, key, key_params);
 	while (cur) {
 		int cmp = key_params.cmp(key, cur->key);
 
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 595a435ea9c8..61bc82b2cea6 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -685,7 +685,7 @@  static void hash_eval(struct hashtab *h, const char *hash_name)
 
 	hashtab_stat(h, &info);
 	pr_debug("SELinux: %s:  %d entries and %d/%d buckets used, longest chain length %d, sum of chain length^2 %llu\n",
-		 hash_name, h->nel, info.slots_used, h->size,
+		 hash_name, h->nel, info.slots_used, hashtab_size(h),
 		 info.max_chain_len, info.chain2_len_sum);
 }