@@ -116,6 +116,7 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
struct ib_udata *udata);
int rxe_dealloc_mw(struct ib_mw *ibmw);
int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
+void rxe_mw_cleanup(struct rxe_pool_entry *arg);
/* rxe_net.c */
void rxe_loopback(struct sk_buff *skb);
@@ -9,21 +9,21 @@
#include "rxe.h"
#include "rxe_loc.h"
-/*
- * lfsr (linear feedback shift register) with period 255
+/* choose a unique non zero random number for lkey
+ * use high order bit to indicate MR vs MW
*/
-static u8 rxe_get_key(void)
+static void rxe_set_mr_lkey(struct rxe_mr *mr)
{
- static u32 key = 1;
-
- key = key << 1;
-
- key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
- ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
-
- key &= 0xff;
-
- return key;
+ u32 lkey;
+ int tries = 0;
+
+ do {
+ get_random_bytes(&lkey, sizeof(lkey));
+ lkey &= ~IS_MW;
+ if (likely(lkey && (rxe_add_key(mr, &lkey) == 0)))
+ return;
+ } while (tries++ < 10);
+ pr_err("unable to get random lkey for mr\n");
}
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
@@ -51,36 +51,19 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
static void rxe_mr_init(int access, struct rxe_mr *mr)
{
- u32 lkey = mr->pelem.index << 8 | rxe_get_key();
- u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
-
- if (mr->pelem.pool->type == RXE_TYPE_MR) {
- mr->ibmr.lkey = lkey;
- mr->ibmr.rkey = rkey;
- }
-
- mr->lkey = lkey;
- mr->rkey = rkey;
+ rxe_add_index(mr);
+ rxe_set_mr_lkey(mr);
+ if (access & IB_ACCESS_REMOTE)
+ mr->ibmr.rkey = mr->ibmr.lkey;
+
+ /* TODO should not have two copies of lkey and rkey in mr */
+ mr->lkey = mr->ibmr.lkey;
+ mr->rkey = mr->ibmr.rkey;
mr->state = RXE_MEM_STATE_INVALID;
mr->type = RXE_MR_TYPE_NONE;
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
}
-void rxe_mr_cleanup(struct rxe_pool_entry *arg)
-{
- struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
- int i;
-
- ib_umem_release(mr->umem);
-
- if (mr->map) {
- for (i = 0; i < mr->num_map; i++)
- kfree(mr->map[i]);
-
- kfree(mr->map);
- }
-}
-
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
{
int i;
@@ -543,9 +526,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
{
struct rxe_mr *mr;
struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
- int index = key >> 8;
- mr = rxe_pool_get_index(&rxe->mr_pool, index);
+ mr = rxe_pool_get_key(&rxe->mr_pool, &key);
if (!mr)
return NULL;
@@ -560,3 +542,21 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
return mr;
}
+
+void rxe_mr_cleanup(struct rxe_pool_entry *arg)
+{
+ struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
+ int i;
+
+ ib_umem_release(mr->umem);
+
+ if (mr->map) {
+ for (i = 0; i < mr->num_map; i++)
+ kfree(mr->map[i]);
+
+ kfree(mr->map);
+ }
+
+ rxe_drop_index(mr);
+ rxe_drop_key(mr);
+}
@@ -8,6 +8,24 @@
#include "rxe.h"
#include "rxe_loc.h"
+/* choose a unique non zero random number for rkey
+ * use high order bit to indicate MR vs MW
+ */
+static void rxe_set_mw_rkey(struct rxe_mw *mw)
+{
+ u32 rkey;
+ int tries = 0;
+
+ do {
+ get_random_bytes(&rkey, sizeof(rkey));
+ rkey |= IS_MW;
+ if (likely((rkey & ~IS_MW) &&
+ (rxe_add_key(mw, &rkey) == 0)))
+ return;
+ } while (tries++ < 10);
+ pr_err("unable to get random rkey for mw\n");
+}
+
/* this temporary code to test ibv_alloc_mw, ibv_dealloc_mw */
struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
struct ib_udata *udata)
@@ -16,6 +34,13 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_mw *mw;
u32 rkey;
+ struct rxe_alloc_mw_resp __user *uresp = NULL;
+
+ if (udata) {
+ if (udata->outlen < sizeof(*uresp))
+ return ERR_PTR(-EINVAL);
+ uresp = udata->outbuf;
+ }
if (unlikely((type != IB_MW_TYPE_1) &&
(type != IB_MW_TYPE_2)))
@@ -29,11 +54,8 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
return ERR_PTR(-ENOMEM);
}
- /* pick a random rkey for now */
- get_random_bytes(&rkey, sizeof(rkey));
-
rxe_add_index(mw);
- rxe_add_key(mw, &rkey);
+ rxe_set_mw_rkey(mw);
spin_lock_init(&mw->lock);
mw->qp = NULL;
@@ -47,6 +69,15 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
RXE_MEM_STATE_FREE :
RXE_MEM_STATE_VALID;
+ if (uresp) {
+ if (copy_to_user(&uresp->index, &mw->pelem.index,
+ sizeof(uresp->index))) {
+ rxe_drop_ref(mw);
+ rxe_drop_ref(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
return &mw->ibmw;
}
@@ -61,8 +92,6 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
spin_unlock_irqrestore(&mw->lock, flags);
rxe_drop_ref(pd);
- rxe_drop_index(mw);
- rxe_drop_key(mw);
rxe_drop_ref(mw);
return 0;
@@ -73,3 +102,11 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
pr_err_once("%s: not implemented\n", __func__);
return -EINVAL;
}
+
+void rxe_mw_cleanup(struct rxe_pool_entry *arg)
+{
+ struct rxe_mw *mw = container_of(arg, typeof(*mw), pelem);
+
+ rxe_drop_index(mw);
+ rxe_drop_key(mw);
+}
@@ -9,15 +9,12 @@
#include "rxe.h"
#include "rxe_loc.h"
-/* info about object pools
- * note that mr and mw share a single index space
- * so that one can map an lkey to the correct type of object
- */
+/* info about object pools */
struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_UC] = {
.name = "rxe-uc",
.size = sizeof(struct rxe_ucontext),
- .flags = RXE_POOL_NO_ALLOC,
+ .flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_PD] = {
.name = "rxe-pd",
@@ -50,24 +47,30 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_CQ] = {
.name = "rxe-cq",
.size = sizeof(struct rxe_cq),
- .flags = RXE_POOL_NO_ALLOC,
+ .flags = RXE_POOL_NO_ALLOC,
.cleanup = rxe_cq_cleanup,
},
[RXE_TYPE_MR] = {
.name = "rxe-mr",
.size = sizeof(struct rxe_mr),
.cleanup = rxe_mr_cleanup,
- .flags = RXE_POOL_INDEX,
+ .flags = RXE_POOL_INDEX
+ | RXE_POOL_KEY,
.max_index = RXE_MAX_MR_INDEX,
.min_index = RXE_MIN_MR_INDEX,
+ .key_offset = offsetof(struct rxe_mr, ibmr.lkey),
+ .key_size = sizeof(u32),
},
[RXE_TYPE_MW] = {
.name = "rxe-mw",
.size = sizeof(struct rxe_mw),
+ .cleanup = rxe_mw_cleanup,
.flags = RXE_POOL_INDEX
| RXE_POOL_KEY,
.max_index = RXE_MAX_MW_INDEX,
.min_index = RXE_MIN_MW_INDEX,
+ .key_offset = offsetof(struct rxe_mw, ibmw.rkey),
+ .key_size = sizeof(u32),
},
[RXE_TYPE_MC_GRP] = {
.name = "rxe-mc_grp",
@@ -298,7 +301,7 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
return;
}
-static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
+static int insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
struct rb_node **link = &pool->key.tree.rb_node;
struct rb_node *parent = NULL;
@@ -314,7 +317,7 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
if (cmp == 0) {
pr_warn("key already exists!\n");
- goto out;
+ return -EAGAIN;
}
if (cmp > 0)
@@ -325,20 +328,23 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
rb_link_node(&new->key_node, parent, link);
rb_insert_color(&new->key_node, &pool->key.tree);
-out:
- return;
+
+ return 0;
}
-void rxe_add_key(void *arg, void *key)
+int rxe_add_key(void *arg, void *key)
{
+ int ret;
struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
write_lock_irqsave(&pool->pool_lock, flags);
memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
- insert_key(pool, elem);
+ ret = insert_key(pool, elem);
write_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return ret;
}
void rxe_drop_key(void *arg)
@@ -135,7 +135,7 @@ void rxe_drop_index(void *elem);
/* assign a key to a keyed object and insert object into
* pool's rb tree
*/
-void rxe_add_key(void *elem, void *key);
+int rxe_add_key(void *elem, void *key);
/* remove elem from rb tree */
void rxe_drop_key(void *elem);
@@ -601,13 +601,13 @@ int rxe_requester(void *arg)
if (wqe->mask & WR_LOCAL_MASK) {
switch (wqe->wr.opcode) {
case IB_WR_LOCAL_INV:
- mr = rxe_pool_get_index(&rxe->mr_pool,
- wqe->wr.ex.invalidate_rkey >> 8);
+ mr = rxe_pool_get_key(&rxe->mr_pool,
+ &wqe->wr.ex.invalidate_rkey);
if (!mr) {
pr_err("No mr for key %#x\n",
wqe->wr.ex.invalidate_rkey);
wqe->state = wqe_state_error;
- wqe->status = IB_WC_MW_BIND_ERR;
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
/* TODO this should be goto err */
goto exit;
}
@@ -890,8 +890,8 @@ static enum resp_states do_complete(struct rxe_qp *qp,
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
wc->ex.invalidate_rkey = ieth_rkey(pkt);
- rmr = rxe_pool_get_index(&rxe->mr_pool,
- wc->ex.invalidate_rkey >> 8);
+ rmr = rxe_pool_get_key(&rxe->mr_pool,
+ &wc->ex.invalidate_rkey);
if (unlikely(!rmr)) {
pr_err("Bad rkey %#x invalidation\n",
wc->ex.invalidate_rkey);
@@ -870,12 +870,14 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
struct rxe_pd *pd = to_rpd(ibpd);
struct rxe_mr *mr;
+ rxe_add_ref(pd);
+
mr = rxe_alloc(&rxe->mr_pool);
- if (!mr)
+ if (!mr) {
+ rxe_drop_ref(pd);
return ERR_PTR(-ENOMEM);
+ }
- rxe_add_index(mr);
- rxe_add_ref(pd);
rxe_mr_init_dma(pd, access, mr);
return &mr->ibmr;
@@ -891,6 +893,17 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
struct rxe_mr *mr;
+ struct rxe_reg_mr_resp __user *uresp = NULL;
+
+ if (udata) {
+ if (udata->outlen < sizeof(*uresp)) {
+ err = -EINVAL;
+ goto err1;
+ }
+ uresp = udata->outbuf;
+ }
+
+ rxe_add_ref(pd);
mr = rxe_alloc(&rxe->mr_pool);
if (!mr) {
@@ -898,22 +911,25 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
goto err2;
}
- rxe_add_index(mr);
-
- rxe_add_ref(pd);
-
err = rxe_mr_init_user(pd, start, length, iova,
- access, udata, mr);
+ access, udata, mr);
if (err)
goto err3;
- return &mr->ibmr;
+ if (uresp) {
+ if (copy_to_user(&uresp->index, &mr->pelem.index,
+ sizeof(uresp->index))) {
+ err = -EFAULT;
+ goto err3;
+ }
+ }
+ return &mr->ibmr;
err3:
- rxe_drop_ref(pd);
- rxe_drop_index(mr);
rxe_drop_ref(mr);
err2:
+ rxe_drop_ref(pd);
+err1:
return ERR_PTR(err);
}
@@ -923,7 +939,6 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
mr->state = RXE_MEM_STATE_ZOMBIE;
rxe_drop_ref(mr->pd);
- rxe_drop_index(mr);
rxe_drop_ref(mr);
return 0;
}
@@ -939,16 +954,14 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
if (mr_type != IB_MR_TYPE_MEM_REG)
return ERR_PTR(-EINVAL);
+ rxe_add_ref(pd);
+
mr = rxe_alloc(&rxe->mr_pool);
if (!mr) {
err = -ENOMEM;
goto err1;
}
- rxe_add_index(mr);
-
- rxe_add_ref(pd);
-
err = rxe_mr_init_fast(pd, max_num_sg, mr);
if (err)
goto err2;
@@ -956,10 +969,9 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
return &mr->ibmr;
err2:
- rxe_drop_ref(pd);
- rxe_drop_index(mr);
rxe_drop_ref(mr);
err1:
+ rxe_drop_ref(pd);
return ERR_PTR(err);
}
@@ -322,6 +322,9 @@ struct rxe_mr {
struct rxe_map **map;
};
+/* use high order bit to separate MW and MR rkeys */
+#define IS_MW (1 << 31)
+
struct rxe_mw {
struct rxe_pool_entry pelem;
struct ib_mw ibmw;
@@ -183,4 +183,14 @@ struct rxe_modify_srq_cmd {
__aligned_u64 mmap_info_addr;
};
+struct rxe_reg_mr_resp {
+ __u32 index;
+ __u32 reserved;
+};
+
+struct rxe_alloc_mw_resp {
+ __u32 index;
+ __u32 reserved;
+};
+
#endif /* RDMA_USER_RXE_H */
Give each MR and each MW an index and a key. Added a random key generator for each type. Added responses for ibv_reg_mr and ibv_alloc_mw with the assigned indices so other verbs can refer to them by index. Changed key lookups to rxe_pool_get_key instead of rxe_pool_get_index. Signed-off-by: Bob Pearson <rpearson@hpe.com> --- drivers/infiniband/sw/rxe/rxe_loc.h | 1 + drivers/infiniband/sw/rxe/rxe_mr.c | 80 +++++++++++++-------------- drivers/infiniband/sw/rxe/rxe_mw.c | 49 ++++++++++++++-- drivers/infiniband/sw/rxe/rxe_pool.c | 32 ++++++----- drivers/infiniband/sw/rxe/rxe_pool.h | 2 +- drivers/infiniband/sw/rxe/rxe_req.c | 6 +- drivers/infiniband/sw/rxe/rxe_resp.c | 4 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 48 ++++++++++------ drivers/infiniband/sw/rxe/rxe_verbs.h | 3 + include/uapi/rdma/rdma_user_rxe.h | 10 ++++ 10 files changed, 152 insertions(+), 83 deletions(-)