@@ -42,6 +42,66 @@
#include "uverbs.h"
+static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem,
+ struct ib_umem *umem, unsigned long addr,
+ int dmasync)
+{
+ int ret;
+ const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
+
+ umem->ib_peer_mem = ib_peer_mem;
+ /*
+ * We always request write permissions to the pages, to force breaking of any CoW
+ * during the registration of the MR. For read-only MRs we use the "force" flag to
+ * indicate that CoW breaking is required but the registration should not fail if
+ * referencing read-only areas.
+ */
+ ret = peer_mem->get_pages(addr, umem->length,
+ 1, !umem->writable,
+ &umem->sg_head,
+ umem->peer_mem_client_context,
+ 0);
+ if (ret)
+ goto out;
+
+ umem->page_size = peer_mem->get_page_size
+ (umem->peer_mem_client_context);
+ if (umem->page_size <= 0)
+ goto put_pages;
+
+ umem->offset = addr & ((unsigned long)umem->page_size - 1);
+ ret = peer_mem->dma_map(&umem->sg_head,
+ umem->peer_mem_client_context,
+ umem->context->device->dma_device,
+ dmasync,
+ &umem->nmap);
+ if (ret)
+ goto put_pages;
+
+ return umem;
+
+put_pages:
+ peer_mem->put_pages(umem->peer_mem_client_context,
+ &umem->sg_head);
+out:
+ ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
+ kfree(umem);
+ return ERR_PTR(ret);
+}
+
+static void peer_umem_release(struct ib_umem *umem)
+{
+ const struct peer_memory_client *peer_mem =
+ umem->ib_peer_mem->peer_mem;
+
+ peer_mem->dma_unmap(&umem->sg_head,
+ umem->peer_mem_client_context,
+ umem->context->device->dma_device);
+ peer_mem->put_pages(&umem->sg_head,
+ umem->peer_mem_client_context);
+ ib_put_peer_client(umem->ib_peer_mem, umem->peer_mem_client_context);
+ kfree(umem);
+}
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
@@ -74,9 +134,11 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
* @dmasync: flush in-flight DMA when the memory region is written
+ * @peer_mem_flags: IB_PEER_MEM_xxx flags for memory being used
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
- size_t size, int access, int dmasync)
+ size_t size, int access, int dmasync,
+ unsigned long peer_mem_flags)
{
struct ib_umem *umem;
struct page **page_list;
@@ -114,6 +176,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
+ if (peer_mem_flags & IB_PEER_MEM_ALLOW) {
+ struct ib_peer_memory_client *peer_mem_client;
+
+ peer_mem_client = ib_get_peer_client(context, addr, size,
+ &umem->peer_mem_client_context);
+ if (peer_mem_client)
+ return peer_umem_get(peer_mem_client, umem, addr,
+ dmasync);
+ }
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
@@ -234,6 +305,10 @@ void ib_umem_release(struct ib_umem *umem)
struct mm_struct *mm;
struct task_struct *task;
unsigned long diff;
+ if (umem->ib_peer_mem) {
+ peer_umem_release(umem);
+ return;
+ }
__ib_umem_release(umem->context->device, umem, 1);
@@ -444,7 +444,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd;
- c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0, 0);
if (IS_ERR(c2mr->umem)) {
err = PTR_ERR(c2mr->umem);
kfree(c2mr);
@@ -635,7 +635,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp->rhp = rhp;
- mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
@@ -705,7 +705,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp->rhp = rhp;
- mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
@@ -359,7 +359,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
- mr_access_flags, 0);
+ mr_access_flags, 0, 0);
if (IS_ERR(e_mr->umem)) {
ib_mr = (void *)e_mr->umem;
goto reg_user_mr_exit1;
@@ -198,7 +198,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
umem = ib_umem_get(pd->uobject->context, start, length,
- mr_access_flags, 0);
+ mr_access_flags, 0, 0);
if (IS_ERR(umem))
return (void *) umem;
@@ -142,7 +142,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
int cqe_size = dev->dev->caps.cqe_size;
*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ IB_ACCESS_LOCAL_WRITE, 1, IB_PEER_MEM_ALLOW);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -62,7 +62,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
- PAGE_SIZE, 0, 0);
+ PAGE_SIZE, 0, 0, IB_PEER_MEM_ALLOW);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
@@ -147,7 +147,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
/* Force registering the memory as writable. */
/* Used for memory re-registeration. HCA protects the access */
mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags | IB_ACCESS_LOCAL_WRITE, 0);
+ access_flags | IB_ACCESS_LOCAL_WRITE, 0,
+ IB_PEER_MEM_ALLOW);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
@@ -226,12 +227,18 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
int err;
int n;
+ /* Peer memory isn't supported */
+ if (mmr->umem->ib_peer_mem) {
+ err = -ENOTSUPP;
+ goto release_mpt_entry;
+ }
+
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
mmr->umem = ib_umem_get(mr->uobject->context, start, length,
mr_access_flags |
IB_ACCESS_LOCAL_WRITE,
- 0);
+ 0, 0);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
@@ -721,7 +721,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err;
qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
+ qp->buf_size, 0, 0, IB_PEER_MEM_ALLOW);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
@@ -114,7 +114,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
}
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- buf_size, 0, 0);
+ buf_size, 0, 0, IB_PEER_MEM_ALLOW);
if (IS_ERR(srq->umem)) {
err = PTR_ERR(srq->umem);
goto err_srq;
@@ -628,7 +628,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
entries * ucmd.cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ IB_ACCESS_LOCAL_WRITE, 1,
+ IB_PEER_MEM_ALLOW);
if (IS_ERR(cq->buf.umem)) {
err = PTR_ERR(cq->buf.umem);
return err;
@@ -958,7 +959,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
return -EINVAL;
umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ IB_ACCESS_LOCAL_WRITE, 1, IB_PEER_MEM_ALLOW);
if (IS_ERR(umem)) {
err = PTR_ERR(umem);
return err;
@@ -64,7 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
- PAGE_SIZE, 0, 0);
+ PAGE_SIZE, 0, 0, IB_PEER_MEM_ALLOW);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
@@ -884,7 +884,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
start, virt_addr, length);
umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
- 0);
+ 0, IB_PEER_MEM_ALLOW);
if (IS_ERR(umem)) {
mlx5_ib_dbg(dev, "umem get failed\n");
return (void *)umem;
@@ -584,7 +584,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (ucmd.buf_addr && qp->buf_size) {
qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
+ qp->buf_size, 0, 0, IB_PEER_MEM_ALLOW);
if (IS_ERR(qp->umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
err = PTR_ERR(qp->umem);
@@ -103,7 +103,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
- 0, 0);
+ 0, 0, IB_PEER_MEM_ALLOW);
if (IS_ERR(srq->umem)) {
mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
err = PTR_ERR(srq->umem);
@@ -1002,7 +1002,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
- ucmd.mr_attrs & MTHCA_MR_DMASYNC);
+ ucmd.mr_attrs & MTHCA_MR_DMASYNC, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
@@ -2333,7 +2333,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u8 stag_key;
int first_page = 1;
- region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ region = ib_umem_get(pd->uobject->context, start, length, acc, 0, 0);
if (IS_ERR(region)) {
return (struct ib_mr *)region;
}
@@ -794,7 +794,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(status);
- mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+ mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0, 0);
if (IS_ERR(mr->umem)) {
status = -EFAULT;
goto umem_err;
@@ -242,7 +242,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
umem = ib_umem_get(pd->uobject->context, start, length,
- mr_access_flags, 0);
+ mr_access_flags, 0, 0);
if (IS_ERR(umem))
return (void *) umem;
@@ -13,6 +13,10 @@ struct ib_peer_memory_client {
struct completion unload_comp;
};
+enum ib_peer_mem_flags {
+ IB_PEER_MEM_ALLOW = 1,
+};
+
struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
size_t size, void **peer_client_context);
@@ -36,6 +36,7 @@
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
+#include <rdma/ib_peer_mem.h>
struct ib_ucontext;
@@ -53,12 +54,17 @@ struct ib_umem {
struct sg_table sg_head;
int nmap;
int npages;
+ /* peer memory that manages this umem */
+ struct ib_peer_memory_client *ib_peer_mem;
+ /* peer memory private context */
+ void *peer_mem_client_context;
};
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
- size_t size, int access, int dmasync);
+ size_t size, int access, int dmasync,
+ unsigned long peer_mem_flags);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
@@ -67,8 +73,9 @@ int ib_umem_page_count(struct ib_umem *umem);
#include <linux/err.h>
static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
- unsigned long addr, size_t size,
- int access, int dmasync) {
+ unsigned long addr, size_t size,
+ int access, int dmasync,
+ unsigned long peer_mem_flags) {
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }