diff mbox

[v2,rdma-next,11/29] IB/rxe: Allocation pool for RDMA objects

Message ID 1466084751-29901-12-git-send-email-monis@mellanox.com (mailing list archive)
State Accepted
Headers show

Commit Message

Moni Shoua June 16, 2016, 1:45 p.m. UTC
Manage and allocate pool of objects with given limit on number of
elements.  Gets parameters from rxe_type_info. Pool elements are
allocated out of a slab cache.  Objects that are using this facility
are: PD, QP, SRQ, CQ, MR, FMR, MW, etc.

Signed-off-by: Kamal Heib <kamalh@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Haggai Eran <haggaie@mellanox.com>
---
 drivers/infiniband/hw/rxe/rxe_pool.c | 510 +++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/rxe/rxe_pool.h | 164 +++++++++++
 2 files changed, 674 insertions(+)
 create mode 100644 drivers/infiniband/hw/rxe/rxe_pool.c
 create mode 100644 drivers/infiniband/hw/rxe/rxe_pool.h
diff mbox

Patch

diff --git a/drivers/infiniband/hw/rxe/rxe_pool.c b/drivers/infiniband/hw/rxe/rxe_pool.c
new file mode 100644
index 0000000..cd37d4f
--- /dev/null
+++ b/drivers/infiniband/hw/rxe/rxe_pool.c
@@ -0,0 +1,510 @@ 
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *	   Redistribution and use in source and binary forms, with or
+ *	   without modification, are permitted provided that the following
+ *	   conditions are met:
+ *
+ *		- Redistributions of source code must retain the above
+ *		  copyright notice, this list of conditions and the following
+ *		  disclaimer.
+ *
+ *		- Redistributions in binary form must reproduce the above
+ *		  copyright notice, this list of conditions and the following
+ *		  disclaimer in the documentation and/or other materials
+ *		  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+/* info about object pools
+ * note that mr, fmr and mw share a single index space
+ * so that one can map an lkey to the correct type of object
+ */
+struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
+	[RXE_TYPE_UC] = {
+		.name		= "rxe-uc",
+		.size		= sizeof(struct rxe_ucontext),
+	},
+	[RXE_TYPE_PD] = {
+		.name		= "rxe-pd",
+		.size		= sizeof(struct rxe_pd),
+	},
+	[RXE_TYPE_AH] = {
+		.name		= "rxe-ah",
+		.size		= sizeof(struct rxe_ah),
+		.flags		= RXE_POOL_ATOMIC,
+	},
+	[RXE_TYPE_SRQ] = {
+		.name		= "rxe-srq",
+		.size		= sizeof(struct rxe_srq),
+		.flags		= RXE_POOL_INDEX,
+		.min_index	= RXE_MIN_SRQ_INDEX,
+		.max_index	= RXE_MAX_SRQ_INDEX,
+	},
+	[RXE_TYPE_QP] = {
+		.name		= "rxe-qp",
+		.size		= sizeof(struct rxe_qp),
+		.cleanup	= rxe_qp_cleanup,
+		.flags		= RXE_POOL_INDEX,
+		.min_index	= RXE_MIN_QP_INDEX,
+		.max_index	= RXE_MAX_QP_INDEX,
+	},
+	[RXE_TYPE_CQ] = {
+		.name		= "rxe-cq",
+		.size		= sizeof(struct rxe_cq),
+		.cleanup	= rxe_cq_cleanup,
+	},
+	[RXE_TYPE_MR] = {
+		.name		= "rxe-mr",
+		.size		= sizeof(struct rxe_mem),
+		.cleanup	= rxe_mem_cleanup,
+		.flags		= RXE_POOL_INDEX,
+		.max_index	= RXE_MAX_MR_INDEX,
+		.min_index	= RXE_MIN_MR_INDEX,
+	},
+	[RXE_TYPE_FMR] = {
+		.name		= "rxe-fmr",
+		.size		= sizeof(struct rxe_mem),
+		.cleanup	= rxe_mem_cleanup,
+		.flags		= RXE_POOL_INDEX,
+		.max_index	= RXE_MAX_FMR_INDEX,
+		.min_index	= RXE_MIN_FMR_INDEX,
+	},
+	[RXE_TYPE_MW] = {
+		.name		= "rxe-mw",
+		.size		= sizeof(struct rxe_mem),
+		.flags		= RXE_POOL_INDEX,
+		.max_index	= RXE_MAX_MW_INDEX,
+		.min_index	= RXE_MIN_MW_INDEX,
+	},
+	[RXE_TYPE_MC_GRP] = {
+		.name		= "rxe-mc_grp",
+		.size		= sizeof(struct rxe_mc_grp),
+		.cleanup	= rxe_mc_cleanup,
+		.flags		= RXE_POOL_KEY,
+		.key_offset	= offsetof(struct rxe_mc_grp, mgid),
+		.key_size	= sizeof(union ib_gid),
+	},
+	[RXE_TYPE_MC_ELEM] = {
+		.name		= "rxe-mc_elem",
+		.size		= sizeof(struct rxe_mc_elem),
+		.flags		= RXE_POOL_ATOMIC,
+	},
+};
+
+static inline char *pool_name(struct rxe_pool *pool)
+{
+	return rxe_type_info[pool->type].name;
+}
+
+static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
+{
+	return rxe_type_info[pool->type].cache;
+}
+
+static inline enum rxe_elem_type rxe_type(void *arg)
+{
+	struct rxe_pool_entry *elem = arg;
+
+	return elem->pool->type;
+}
+
+int rxe_cache_init(void)
+{
+	int err;
+	int i;
+	size_t size;
+	struct rxe_type_info *type;
+
+	for (i = 0; i < RXE_NUM_TYPES; i++) {
+		type = &rxe_type_info[i];
+		size = ALIGN(type->size, RXE_POOL_ALIGN);
+		type->cache = kmem_cache_create(type->name, size,
+				RXE_POOL_ALIGN,
+				RXE_POOL_CACHE_FLAGS, NULL);
+		if (!type->cache) {
+			pr_err("Unable to init kmem cache for %s\n",
+			       type->name);
+			err = -ENOMEM;
+			goto err1;
+		}
+	}
+
+	return 0;
+
+err1:
+	while (--i >= 0) {
+		kmem_cache_destroy(type->cache);
+		type->cache = NULL;
+	}
+
+	return err;
+}
+
+void rxe_cache_exit(void)
+{
+	int i;
+	struct rxe_type_info *type;
+
+	for (i = 0; i < RXE_NUM_TYPES; i++) {
+		type = &rxe_type_info[i];
+		kmem_cache_destroy(type->cache);
+		type->cache = NULL;
+	}
+}
+
+static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
+{
+	int err = 0;
+	size_t size;
+
+	if ((max - min + 1) < pool->max_elem) {
+		pr_warn("not enough indices for max_elem\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	pool->max_index = max;
+	pool->min_index = min;
+
+	size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
+	pool->table = kmalloc(size, GFP_KERNEL);
+	if (!pool->table) {
+		pr_warn("no memory for bit table\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	pool->table_size = size;
+	bitmap_zero(pool->table, max - min + 1);
+
+out:
+	return err;
+}
+
+int rxe_pool_init(
+	struct rxe_dev		*rxe,
+	struct rxe_pool		*pool,
+	enum rxe_elem_type	type,
+	unsigned		max_elem)
+{
+	int			err = 0;
+	size_t			size = rxe_type_info[type].size;
+
+	memset(pool, 0, sizeof(*pool));
+
+	pool->rxe		= rxe;
+	pool->type		= type;
+	pool->max_elem		= max_elem;
+	pool->elem_size		= ALIGN(size, RXE_POOL_ALIGN);
+	pool->flags		= rxe_type_info[type].flags;
+	pool->tree		= RB_ROOT;
+	pool->cleanup		= rxe_type_info[type].cleanup;
+
+	atomic_set(&pool->num_elem, 0);
+
+	kref_init(&pool->ref_cnt);
+
+	spin_lock_init(&pool->pool_lock);
+
+	if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
+		err = rxe_pool_init_index(pool,
+					  rxe_type_info[type].max_index,
+					  rxe_type_info[type].min_index);
+		if (err)
+			goto out;
+	}
+
+	if (rxe_type_info[type].flags & RXE_POOL_KEY) {
+		pool->key_offset = rxe_type_info[type].key_offset;
+		pool->key_size = rxe_type_info[type].key_size;
+	}
+
+	pool->state = rxe_pool_valid;
+
+out:
+	return err;
+}
+
+static void rxe_pool_release(struct kref *kref)
+{
+	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
+
+	pool->state = rxe_pool_invalid;
+	kfree(pool->table);
+}
+
+static void rxe_pool_put(struct rxe_pool *pool)
+{
+	kref_put(&pool->ref_cnt, rxe_pool_release);
+}
+
+int rxe_pool_cleanup(struct rxe_pool *pool)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->pool_lock, flags);
+	pool->state = rxe_pool_invalid;
+	if (atomic_read(&pool->num_elem) > 0)
+		pr_warn("%s pool destroyed with unfree'd elem\n",
+			pool_name(pool));
+	spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+	rxe_pool_put(pool);
+
+	return 0;
+}
+
+static u32 alloc_index(struct rxe_pool *pool)
+{
+	u32 index;
+	u32 range = pool->max_index - pool->min_index + 1;
+
+	index = find_next_zero_bit(pool->table, range, pool->last);
+	if (index >= range)
+		index = find_first_zero_bit(pool->table, range);
+
+	set_bit(index, pool->table);
+	pool->last = index;
+	return index + pool->min_index;
+}
+
+static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
+{
+	struct rb_node **link = &pool->tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct rxe_pool_entry *elem;
+
+	while (*link) {
+		parent = *link;
+		elem = rb_entry(parent, struct rxe_pool_entry, node);
+
+		if (elem->index == new->index) {
+			pr_warn("element already exists!\n");
+			goto out;
+		}
+
+		if (elem->index > new->index)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	rb_link_node(&new->node, parent, link);
+	rb_insert_color(&new->node, &pool->tree);
+out:
+	return;
+}
+
+static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
+{
+	struct rb_node **link = &pool->tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct rxe_pool_entry *elem;
+	int cmp;
+
+	while (*link) {
+		parent = *link;
+		elem = rb_entry(parent, struct rxe_pool_entry, node);
+
+		cmp = memcmp((u8 *)elem + pool->key_offset,
+			     (u8 *)new + pool->key_offset, pool->key_size);
+
+		if (cmp == 0) {
+			pr_warn("key already exists!\n");
+			goto out;
+		}
+
+		if (cmp > 0)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	rb_link_node(&new->node, parent, link);
+	rb_insert_color(&new->node, &pool->tree);
+out:
+	return;
+}
+
+void rxe_add_key(void *arg, void *key)
+{
+	struct rxe_pool_entry *elem = arg;
+	struct rxe_pool *pool = elem->pool;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->pool_lock, flags);
+	memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
+	insert_key(pool, elem);
+	spin_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void rxe_drop_key(void *arg)
+{
+	struct rxe_pool_entry *elem = arg;
+	struct rxe_pool *pool = elem->pool;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->pool_lock, flags);
+	rb_erase(&elem->node, &pool->tree);
+	spin_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void rxe_add_index(void *arg)
+{
+	struct rxe_pool_entry *elem = arg;
+	struct rxe_pool *pool = elem->pool;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->pool_lock, flags);
+	elem->index = alloc_index(pool);
+	insert_index(pool, elem);
+	spin_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void rxe_drop_index(void *arg)
+{
+	struct rxe_pool_entry *elem = arg;
+	struct rxe_pool *pool = elem->pool;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->pool_lock, flags);
+	clear_bit(elem->index - pool->min_index, pool->table);
+	rb_erase(&elem->node, &pool->tree);
+	spin_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void *rxe_alloc(struct rxe_pool *pool)
+{
+	struct rxe_pool_entry *elem;
+	unsigned long flags;
+
+	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
+
+	spin_lock_irqsave(&pool->pool_lock, flags);
+	if (pool->state != rxe_pool_valid) {
+		spin_unlock_irqrestore(&pool->pool_lock, flags);
+		return NULL;
+	}
+	kref_get(&pool->ref_cnt);
+	spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+	kref_get(&pool->rxe->ref_cnt);
+
+	if (atomic_inc_return(&pool->num_elem) > pool->max_elem) {
+		atomic_dec(&pool->num_elem);
+		rxe_dev_put(pool->rxe);
+		rxe_pool_put(pool);
+		return NULL;
+	}
+
+	elem = kmem_cache_zalloc(pool_cache(pool),
+				 (pool->flags & RXE_POOL_ATOMIC) ?
+				 GFP_ATOMIC : GFP_KERNEL);
+
+	elem->pool = pool;
+	kref_init(&elem->ref_cnt);
+
+	return elem;
+}
+
+void rxe_elem_release(struct kref *kref)
+{
+	struct rxe_pool_entry *elem =
+		container_of(kref, struct rxe_pool_entry, ref_cnt);
+	struct rxe_pool *pool = elem->pool;
+
+	if (pool->cleanup)
+		pool->cleanup(elem);
+
+	kmem_cache_free(pool_cache(pool), elem);
+	atomic_dec(&pool->num_elem);
+	rxe_dev_put(pool->rxe);
+	rxe_pool_put(pool);
+}
+
+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
+{
+	struct rb_node *node = NULL;
+	struct rxe_pool_entry *elem = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->pool_lock, flags);
+
+	if (pool->state != rxe_pool_valid)
+		goto out;
+
+	node = pool->tree.rb_node;
+
+	while (node) {
+		elem = rb_entry(node, struct rxe_pool_entry, node);
+
+		if (elem->index > index)
+			node = node->rb_left;
+		else if (elem->index < index)
+			node = node->rb_right;
+		else
+			break;
+	}
+
+	if (node)
+		kref_get(&elem->ref_cnt);
+
+out:
+	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	return node ? (void *)elem : NULL;
+}
+
+void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+{
+	struct rb_node *node = NULL;
+	struct rxe_pool_entry *elem = NULL;
+	int cmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->pool_lock, flags);
+
+	if (pool->state != rxe_pool_valid)
+		goto out;
+
+	node = pool->tree.rb_node;
+
+	while (node) {
+		elem = rb_entry(node, struct rxe_pool_entry, node);
+
+		cmp = memcmp((u8 *)elem + pool->key_offset,
+			     key, pool->key_size);
+
+		if (cmp > 0)
+			node = node->rb_left;
+		else if (cmp < 0)
+			node = node->rb_right;
+		else
+			break;
+	}
+
+	if (node)
+		kref_get(&elem->ref_cnt);
+
+out:
+	spin_unlock_irqrestore(&pool->pool_lock, flags);
+	return node ? ((void *)elem) : NULL;
+}
diff --git a/drivers/infiniband/hw/rxe/rxe_pool.h b/drivers/infiniband/hw/rxe/rxe_pool.h
new file mode 100644
index 0000000..43476b5
--- /dev/null
+++ b/drivers/infiniband/hw/rxe/rxe_pool.h
@@ -0,0 +1,164 @@ 
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *	   Redistribution and use in source and binary forms, with or
+ *	   without modification, are permitted provided that the following
+ *	   conditions are met:
+ *
+ *		- Redistributions of source code must retain the above
+ *		  copyright notice, this list of conditions and the following
+ *		  disclaimer.
+ *
+ *		- Redistributions in binary form must reproduce the above
+ *		  copyright notice, this list of conditions and the following
+ *		  disclaimer in the documentation and/or other materials
+ *		  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_POOL_H
+#define RXE_POOL_H
+
+#define RXE_POOL_ALIGN		(16)
+#define RXE_POOL_CACHE_FLAGS	(0)
+
+enum rxe_pool_flags {
+	RXE_POOL_ATOMIC		= BIT(0),
+	RXE_POOL_INDEX		= BIT(1),
+	RXE_POOL_KEY		= BIT(2),
+};
+
+enum rxe_elem_type {
+	RXE_TYPE_UC,
+	RXE_TYPE_PD,
+	RXE_TYPE_AH,
+	RXE_TYPE_SRQ,
+	RXE_TYPE_QP,
+	RXE_TYPE_CQ,
+	RXE_TYPE_MR,
+	RXE_TYPE_MW,
+	RXE_TYPE_FMR,
+	RXE_TYPE_MC_GRP,
+	RXE_TYPE_MC_ELEM,
+	RXE_NUM_TYPES,		/* keep me last */
+};
+
+struct rxe_type_info {
+	char			*name;
+	size_t			size;
+	void			(*cleanup)(void *obj);
+	enum rxe_pool_flags	flags;
+	u32			max_index;
+	u32			min_index;
+	size_t			key_offset;
+	size_t			key_size;
+	struct kmem_cache	*cache;
+};
+
+extern struct rxe_type_info rxe_type_info[];
+
+enum rxe_pool_state {
+	rxe_pool_invalid,
+	rxe_pool_valid,
+};
+
+struct rxe_pool_entry {
+	struct rxe_pool		*pool;
+	struct kref		ref_cnt;
+	struct list_head	list;
+
+	/* only used if indexed or keyed */
+	struct rb_node		node;
+	u32			index;
+};
+
+struct rxe_pool {
+	struct rxe_dev		*rxe;
+	spinlock_t              pool_lock; /* pool spinlock */
+	size_t			elem_size;
+	struct kref		ref_cnt;
+	void			(*cleanup)(void *obj);
+	enum rxe_pool_state	state;
+	enum rxe_pool_flags	flags;
+	enum rxe_elem_type	type;
+
+	unsigned int		max_elem;
+	atomic_t		num_elem;
+
+	/* only used if indexed or keyed */
+	struct rb_root		tree;
+	unsigned long		*table;
+	size_t			table_size;
+	u32			max_index;
+	u32			min_index;
+	u32			last;
+	size_t			key_offset;
+	size_t			key_size;
+};
+
+/* initialize slab caches for managed objects */
+int rxe_cache_init(void);
+
+/* cleanup slab caches for managed objects */
+void rxe_cache_exit(void);
+
+/* initialize a pool of objects with given limit on
+ * number of elements. gets parameters from rxe_type_info
+ * pool elements will be allocated out of a slab cache
+ */
+int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
+		  enum rxe_elem_type type, u32 max_elem);
+
+/* free resources from object pool */
+int rxe_pool_cleanup(struct rxe_pool *pool);
+
+/* allocate an object from pool */
+void *rxe_alloc(struct rxe_pool *pool);
+
+/* assign an index to an indexed object and insert object into
+ *  pool's rb tree
+ */
+void rxe_add_index(void *elem);
+
+/* drop an index and remove object from rb tree */
+void rxe_drop_index(void *elem);
+
+/* assign a key to a keyed object and insert object into
+ *  pool's rb tree
+ */
+void rxe_add_key(void *elem, void *key);
+
+/* remove elem from rb tree */
+void rxe_drop_key(void *elem);
+
+/* lookup an indexed object from index. takes a reference on object */
+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
+
+/* lookup keyed object from key. takes a reference on the object */
+void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
+
+/* cleanup an object when all references are dropped */
+void rxe_elem_release(struct kref *kref);
+
+/* take a reference on an object */
+#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt)
+
+/* drop a reference on an object */
+#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release)
+
+#endif /* RXE_POOL_H */