@@ -741,10 +741,10 @@ struct mlx5_cache_ent {
unsigned long stored;
unsigned long reserved;
+ struct rb_node node;
struct mlx5r_cache_rb_key rb_key;
char name[4];
- u32 order;
u8 disabled:1;
u8 fill_to_high_water:1;
@@ -775,8 +775,9 @@ struct mlx5r_async_create_mkey {
struct mlx5_mkey_cache {
struct workqueue_struct *wq;
- struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES];
- struct dentry *root;
+ struct rb_root rb_root;
+ struct mutex rb_lock;
+ struct dentry *fs_root;
unsigned long last_add;
};
@@ -1321,6 +1322,8 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
+struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, u8 access_mode,
unsigned int access_flags,
@@ -1348,7 +1351,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent);
+struct mlx5_cache_ent *mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev);
void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags);
@@ -1367,7 +1370,11 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
-static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline struct mlx5_cache_ent *
+mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev)
+{
+ return NULL;
+}
static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags) {}
@@ -514,18 +514,22 @@ static const struct file_operations limit_fops = {
static bool someone_adding(struct mlx5_mkey_cache *cache)
{
- unsigned int i;
-
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
- struct mlx5_cache_ent *ent = &cache->ent[i];
- bool ret;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+ bool ret;
+ mutex_lock(&cache->rb_lock);
+ for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
xa_lock_irq(&ent->mkeys);
ret = ent->stored < ent->limit;
xa_unlock_irq(&ent->mkeys);
- if (ret)
+ if (ret) {
+ mutex_unlock(&cache->rb_lock);
return true;
+ }
}
+ mutex_unlock(&cache->rb_lock);
return false;
}
@@ -589,8 +593,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
if (err != -EAGAIN) {
mlx5_ib_warn(
dev,
- "command failed order %d, err %d\n",
- ent->order, err);
+ "command failed order %s, err %d\n",
+ ent->name, err);
queue_delayed_work(cache->wq, &ent->dwork,
msecs_to_jiffies(1000));
}
@@ -636,6 +640,72 @@ static void delayed_cache_work_func(struct work_struct *work)
__cache_work_func(ent);
}
+static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
+ struct mlx5_cache_ent *ent)
+{
+ struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL;
+ struct mlx5_cache_ent *cur;
+ int cmp;
+
+ mutex_lock(&cache->rb_lock);
+ /* Figure out where to put new node */
+ while (*new) {
+ cur = rb_entry(*new, struct mlx5_cache_ent, node);
+ parent = *new;
+ cmp = memcmp(&ent->rb_key, &cur->rb_key,
+ sizeof(struct mlx5r_cache_rb_key));
+ if (cmp < 0)
+ new = &((*new)->rb_left);
+ if (cmp > 0)
+ new = &((*new)->rb_right);
+ if (cmp == 0) {
+ mutex_unlock(&cache->rb_lock);
+ return -EEXIST;
+ }
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&ent->node, parent, new);
+ rb_insert_color(&ent->node, &cache->rb_root);
+
+ mutex_unlock(&cache->rb_lock);
+ return 0;
+}
+
+static struct rb_node *
+mlx5_cache_find_smallest_ent(struct mlx5_mkey_cache *cache,
+ struct mlx5r_cache_rb_key rb_key)
+{
+ struct rb_node *node = cache->rb_root.rb_node;
+ struct mlx5_cache_ent *cur, *smallest = NULL;
+ int cmp;
+
+ /*
+ * Find the smallest ent with ent.rb_key >= rb_key.
+ */
+ while (node) {
+ cur = rb_entry(node, struct mlx5_cache_ent, node);
+
+ cmp = memcmp(&rb_key, &cur->rb_key,
+ sizeof(struct mlx5r_cache_rb_key));
+ if (cmp < 0) {
+ /* cur.rb_key > rb_key */
+ smallest = cur;
+ node = node->rb_left;
+ }
+ if (cmp > 0)
+ node = node->rb_right;
+ if (cmp == 0)
+ return &cur->node;
+ }
+
+ return (smallest &&
+ smallest->rb_key.access_mode == rb_key.access_mode &&
+ smallest->rb_key.access_flags == rb_key.access_flags) ?
+ &smallest->node :
+ NULL;
+}
+
static bool mlx5_ent_get_mkey(struct mlx5_cache_ent *ent, struct mlx5_ib_mr *mr)
{
xa_lock_irq(&ent->mkeys);
@@ -655,36 +725,41 @@ static bool mlx5_ent_get_mkey(struct mlx5_cache_ent *ent, struct mlx5_ib_mr *mr)
return true;
}
-static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
- unsigned int order)
-{
- struct mlx5_mkey_cache *cache = &dev->cache;
-
- if (order < cache->ent[0].order)
- return &cache->ent[0];
- order = order - cache->ent[0].order;
- if (order > MKEY_CACHE_LAST_STD_ENTRY)
- return NULL;
- return &cache->ent[order];
-}
-
static bool mlx5_cache_get_mkey(struct mlx5_ib_dev *dev,
struct mlx5r_cache_rb_key rb_key,
struct mlx5_ib_mr *mr)
{
+ struct mlx5_mkey_cache *cache = &dev->cache;
+ unsigned int order, upper_bound;
struct mlx5_cache_ent *ent;
+ struct rb_node *node;
- if (!mlx5r_umr_can_reconfig(dev, 0, rb_key.access_flags))
- return false;
+ order = order_base_2(rb_key.ndescs) > 2 ?
+ order_base_2(rb_key.ndescs) : 2;
+ upper_bound = 1 << order;
+
+ /*
+ * Find the smallest node within the range with available mkeys.
+ */
+ mutex_lock(&cache->rb_lock);
+ node = mlx5_cache_find_smallest_ent(cache, rb_key);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ if (ent->rb_key.access_mode != rb_key.access_mode ||
+ ent->rb_key.access_flags != rb_key.access_flags ||
+ ent->rb_key.ndescs > upper_bound)
+ break;
- if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
- ent = &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY];
+ if (mlx5_ent_get_mkey(ent, mr)) {
+ mutex_unlock(&cache->rb_lock);
+ return true;
+ }
- ent = mkey_cache_ent_from_order(dev, order_base_2(rb_key.ndescs));
- if (!ent)
- return false;
+ node = rb_next(node);
+ }
+ mutex_unlock(&cache->rb_lock);
- return mlx5_ent_get_mkey(ent, mr);
+ return false;
}
static int get_uchangeable_access_flags(struct mlx5_ib_dev *dev,
@@ -743,10 +818,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, u8 access_mode,
return mr;
}
-static void clean_keys(struct mlx5_ib_dev *dev, int c)
+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
{
- struct mlx5_mkey_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent = &cache->ent[c];
u32 mkey;
cancel_delayed_work(&ent->dwork);
@@ -765,26 +838,19 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
if (!mlx5_debugfs_root || dev->is_rep)
return;
- debugfs_remove_recursive(dev->cache.root);
- dev->cache.root = NULL;
+ debugfs_remove_recursive(dev->cache.fs_root);
+ dev->cache.fs_root = NULL;
}
-static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
+static void mlx5_cache_ent_debugfs_init(struct mlx5_ib_dev *dev,
+ struct mlx5_cache_ent *ent, int order)
{
struct mlx5_mkey_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
struct dentry *dir;
- int i;
- if (!mlx5_debugfs_root || dev->is_rep)
- return;
-
- cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
-
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- sprintf(ent->name, "%d", ent->order);
- dir = debugfs_create_dir(ent->name, cache->root);
+ if (cache->fs_root) {
+ sprintf(ent->name, "%d", order);
+ dir = debugfs_create_dir(ent->name, cache->fs_root);
debugfs_create_file("size", 0600, dir, ent, &size_fops);
debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
debugfs_create_ulong("cur", 0400, dir, &ent->stored);
@@ -799,68 +865,114 @@ static void delay_time_func(struct timer_list *t)
WRITE_ONCE(dev->fill_delay, 0);
}
-int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
+struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key)
+{
+ struct mlx5_cache_ent *ent;
+ int ret;
+
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return ERR_PTR(-ENOMEM);
+
+ xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
+ ent->rb_key = rb_key;
+ ent->dev = dev;
+
+ INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+
+ ret = mlx5_cache_ent_insert(&dev->cache, ent);
+ if (ret) {
+ kfree(ent);
+ return ERR_PTR(ret);
+ }
+ return ent;
+}
+
+static int mlx5_cache_init_default_entries(struct mlx5_ib_dev *dev)
{
struct mlx5r_cache_rb_key rb_key = { .access_mode =
MLX5_MKC_ACCESS_MODE_MTT };
struct mlx5_mkey_cache *cache = &dev->cache;
+ bool can_use_cache, need_cache;
struct mlx5_cache_ent *ent;
- int i;
+ int order;
+
+ if (mlx5_debugfs_root && !dev->is_rep)
+ cache->fs_root = debugfs_create_dir(
+ "mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
+
+ can_use_cache = !dev->is_rep && mlx5r_umr_can_load_pas(dev, 0);
+ need_cache = (dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
+ mlx5_core_is_pf(dev->mdev);
+
+ for (order = 2; order <= MKEY_CACHE_LAST_STD_ENTRY + 2; order++) {
+ rb_key.ndescs = 1 << order;
+ ent = mlx5r_cache_create_ent(dev, rb_key);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ mlx5_cache_ent_debugfs_init(dev, ent, order);
+
+ if (can_use_cache && need_cache &&
+ order <= mkey_cache_max_order(dev)) {
+ ent->limit =
+ dev->mdev->profile.mr_cache[order - 2].limit;
+ xa_lock_irq(&ent->mkeys);
+ queue_adjust_cache_locked(ent);
+ xa_unlock_irq(&ent->mkeys);
+ }
+ }
+
+ ent = mlx5_odp_init_mkey_cache_entry(dev);
+ if (ent) {
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ mlx5_cache_ent_debugfs_init(dev, ent,
+ MLX5_IMR_KSM_CACHE_ENTRY + 2);
+ }
+
+ return 0;
+}
+
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
+{
+ int err;
mutex_init(&dev->slow_path_mutex);
- cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
- if (!cache->wq) {
+ mutex_init(&dev->cache.rb_lock);
+ dev->cache.rb_root = RB_ROOT;
+ dev->cache.wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
+ if (!dev->cache.wq) {
mlx5_ib_warn(dev, "failed to create work queue\n");
return -ENOMEM;
}
mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
timer_setup(&dev->delay_timer, delay_time_func, 0);
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
- ent->order = i + 2;
- ent->dev = dev;
- ent->limit = 0;
-
- INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-
- if (i > MKEY_CACHE_LAST_STD_ENTRY) {
- mlx5_odp_init_mkey_cache_entry(ent);
- continue;
- }
-
- if (ent->order > mkey_cache_max_order(dev))
- continue;
-
- rb_key.ndescs = 1 << ent->order;
- ent->rb_key = rb_key;
- if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
- !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
- mlx5r_umr_can_load_pas(dev, 0))
- ent->limit = dev->mdev->profile.mr_cache[i].limit;
- else
- ent->limit = 0;
- xa_lock_irq(&ent->mkeys);
- queue_adjust_cache_locked(ent);
- xa_unlock_irq(&ent->mkeys);
- }
-
- mlx5_mkey_cache_debugfs_init(dev);
+ err = mlx5_cache_init_default_entries(dev);
+ if (err)
+ goto err;
return 0;
+err:
+ mlx5_mkey_cache_cleanup(dev);
+ return err;
}
int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
{
- unsigned int i;
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
if (!dev->cache.wq)
return 0;
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
- struct mlx5_cache_ent *ent = &dev->cache.ent[i];
-
+ mutex_lock(&dev->cache.rb_lock);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
xa_lock_irq(&ent->mkeys);
ent->disabled = true;
xa_unlock_irq(&ent->mkeys);
@@ -870,8 +982,15 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
mlx5_mkey_cache_debugfs_cleanup(dev);
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
- clean_keys(dev, i);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ node = rb_next(node);
+ clean_keys(dev, ent);
+ rb_erase(&ent->node, root);
+ kfree(ent);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
destroy_workqueue(dev->cache.wq);
del_timer_sync(&dev->delay_timer);
@@ -1587,16 +1587,17 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
return err;
}
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
+struct mlx5_cache_ent *mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev)
{
struct mlx5r_cache_rb_key rb_key = {
.access_mode = MLX5_MKC_ACCESS_MODE_KSM,
.ndescs = mlx5_imr_ksm_entries
};
- if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
- return;
- ent->rb_key = rb_key;
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return NULL;
+
+ return mlx5r_cache_create_ent(dev, rb_key);
}
static const struct ib_device_ops mlx5_ib_dev_odp_ops = {