@@ -3490,6 +3490,7 @@ static int mlx5_ib_data_direct_init(struct mlx5_ib_dev *dev)
if (ret)
return ret;
+ INIT_LIST_HEAD(&dev->data_direct_mr_list);
ret = mlx5_data_direct_ib_reg(dev, vuid);
if (ret)
mlx5_ib_free_data_direct_resources(dev);
@@ -3882,6 +3883,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
dump_fill_mkey),
UA_MANDATORY));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_reg_dmabuf_mr,
+ UVERBS_OBJECT_MR,
+ UVERBS_METHOD_REG_DMABUF_MR,
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ enum mlx5_ib_uapi_reg_dmabuf_flags,
+ UA_OPTIONAL));
+
static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
@@ -3891,6 +3900,7 @@ static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_create_cq_defs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, &mlx5_ib_reg_dmabuf_mr),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
@@ -4396,6 +4406,7 @@ void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev)
{
mutex_lock(&ibdev->data_direct_lock);
+ mlx5_ib_revoke_data_direct_mrs(ibdev);
ibdev->data_direct_dev = NULL;
mutex_unlock(&ibdev->data_direct_lock);
}
@@ -682,6 +682,8 @@ struct mlx5_ib_mr {
struct mlx5_ib_mkey mmkey;
struct ib_umem *umem;
+ /* The mr is data direct related */
+ u8 data_direct :1;
union {
/* Used only by kernel MRs (umem == NULL) */
@@ -719,6 +721,10 @@ struct mlx5_ib_mr {
} odp_destroy;
struct ib_odp_counters odp_stats;
bool is_odp_implicit;
+ /* The affilated data direct crossed mr */
+ struct mlx5_ib_mr *dd_crossed_mr;
+ struct list_head dd_node;
+ u8 revoked :1;
};
};
};
@@ -1169,6 +1175,7 @@ struct mlx5_ib_dev {
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
+ struct list_head data_direct_mr_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
@@ -1437,6 +1444,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
struct mlx5_data_direct_dev *dev);
void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
@@ -43,6 +43,7 @@
#include "dm.h"
#include "mlx5_ib.h"
#include "umr.h"
+#include "data_direct.h"
enum {
MAX_PENDING_REG_MR = 8,
@@ -54,7 +55,9 @@ static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate);
+ unsigned int page_size, bool populate,
+ int access_mode);
+static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd)
@@ -1126,12 +1129,10 @@ static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
- int access_flags)
+ int access_flags, int access_mode)
{
- struct mlx5r_cache_rb_key rb_key = {
- .access_mode = MLX5_MKC_ACCESS_MODE_MTT,
- };
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5r_cache_rb_key rb_key = {};
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
unsigned int page_size;
@@ -1144,6 +1145,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
+ rb_key.access_mode = access_mode;
rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
@@ -1154,7 +1156,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
*/
if (!ent) {
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(pd, umem, iova, access_flags, page_size, false);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode);
mutex_unlock(&dev->slow_path_mutex);
if (IS_ERR(mr))
return mr;
@@ -1175,13 +1177,71 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
return mr;
}
+static struct ib_mr *
+reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_flags,
+ u32 crossed_lkey)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int access_mode = MLX5_MKC_ACCESS_MODE_CROSSING;
+ struct mlx5_ib_mr *mr;
+ void *mkc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev->mdev, crossing_vhca_mkey))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_1;
+ }
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, crossing_target_vhca_id,
+ MLX5_CAP_GEN(dev->mdev, vhca_id));
+ MLX5_SET(mkc, mkc, translations_octword_size, crossed_lkey);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+
+ /* for this crossing mkey IOVA should be 0 and len should be IOVA + len */
+ set_mkc_access_pd_addr_fields(mkc, access_flags, 0, pd);
+ MLX5_SET64(mkc, mkc, len, iova + length);
+
+ MLX5_SET(mkc, mkc, free, 0);
+ MLX5_SET(mkc, mkc, umr_en, 0);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_2;
+
+ mr->mmkey.type = MLX5_MKEY_MR;
+ set_mr_fields(dev, mr, length, access_flags, iova);
+ mr->ibmr.pd = pd;
+ kvfree(in);
+ mlx5_ib_dbg(dev, "crossing mkey = 0x%x\n", mr->mmkey.key);
+
+ return &mr->ibmr;
+err_2:
+ kvfree(in);
+err_1:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
/*
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
*/
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate)
+ unsigned int page_size, bool populate,
+ int access_mode)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@@ -1190,7 +1250,9 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
int inlen;
u32 *in;
int err;
- bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
+ bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) &&
+ (access_mode == MLX5_MKC_ACCESS_MODE_MTT);
+ bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
if (!page_size)
return ERR_PTR(-EINVAL);
@@ -1213,7 +1275,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
if (populate) {
- if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
+ if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND || ksm_mode)) {
err = -EINVAL;
goto err_2;
}
@@ -1229,14 +1291,22 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
populate ? pd : dev->umrc.pd);
+ /* In case a data direct flow, overwrite the pdn field by its internal kernel PD */
+ if (umem->is_dmabuf && ksm_mode)
+ MLX5_SET(mkc, mkc, pd, dev->ddr.pdn);
+
MLX5_SET(mkc, mkc, free, !populate);
- MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET64(mkc, mkc, len, umem->length);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
- MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(iova, umem->length, mr->page_shift));
+ if (ksm_mode)
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(iova, umem->length, mr->page_shift) * 2);
+ else
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(iova, umem->length, mr->page_shift));
MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
if (mlx5_umem_needs_ats(dev, umem, access_flags))
MLX5_SET(mkc, mkc, ma_translation_mode, 1);
@@ -1373,13 +1443,15 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
if (xlt_with_umr) {
- mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
+ mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT);
} else {
unsigned int page_size = mlx5_umem_find_best_pgsz(
umem, mkc, log_page_size, 0, iova);
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(pd, umem, iova, access_flags, page_size, true);
+ mr = reg_create(pd, umem, iova, access_flags, page_size,
+ true, MLX5_MKC_ACCESS_MODE_MTT);
mutex_unlock(&dev->slow_path_mutex);
}
if (IS_ERR(mr)) {
@@ -1442,7 +1514,8 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(odp))
return ERR_CAST(odp);
- mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
+ mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT);
if (IS_ERR(mr)) {
ib_umem_release(&odp->umem);
return ERR_CAST(mr);
@@ -1510,35 +1583,31 @@ static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
.move_notify = mlx5_ib_dmabuf_invalidate_cb,
};
-struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
- u64 length, u64 virt_addr,
- int fd, int access_flags,
- struct uverbs_attr_bundle *attrs)
+static struct ib_mr *
+reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
+ u64 offset, u64 length, u64 virt_addr,
+ int fd, int access_flags, int access_mode)
{
+ bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
struct ib_umem_dmabuf *umem_dmabuf;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
- !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- return ERR_PTR(-EOPNOTSUPP);
-
- mlx5_ib_dbg(dev,
- "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
- offset, virt_addr, length, fd, access_flags);
-
err = mlx5r_umr_resource_init(dev);
if (err)
return ERR_PTR(err);
- /* dmabuf requires xlt update via umr to work. */
- if (!mlx5r_umr_can_load_pas(dev, length))
- return ERR_PTR(-EINVAL);
+ if (!pinned_mode)
+ umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev,
+ offset, length, fd,
+ access_flags,
+ &mlx5_ib_dmabuf_attach_ops);
+ else
+ umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev,
+ dma_device, offset, length,
+ fd, access_flags);
- umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
- access_flags,
- &mlx5_ib_dmabuf_attach_ops);
if (IS_ERR(umem_dmabuf)) {
mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
PTR_ERR(umem_dmabuf));
@@ -1546,7 +1615,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
}
mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
- access_flags);
+ access_flags, access_mode);
if (IS_ERR(mr)) {
ib_umem_release(&umem_dmabuf->umem);
return ERR_CAST(mr);
@@ -1556,9 +1625,13 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
umem_dmabuf->private = mr;
- err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
- if (err)
- goto err_dereg_mr;
+ if (!pinned_mode) {
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
+ if (err)
+ goto err_dereg_mr;
+ } else {
+ mr->data_direct = true;
+ }
err = mlx5_ib_init_dmabuf_mr(mr);
if (err)
@@ -1566,10 +1639,101 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
return &mr->ibmr;
err_dereg_mr:
- mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+ __mlx5_ib_dereg_mr(&mr->ibmr);
return ERR_PTR(err);
}
+static struct ib_mr *
+reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_data_direct_dev *data_direct_dev;
+ struct ib_mr *crossing_mr;
+ struct ib_mr *crossed_mr;
+ int ret = 0;
+
+ /* As of HW behaviour the IOVA must be page aligned in KSM mode */
+ if (!PAGE_ALIGNED(virt_addr) || (access_flags & IB_ACCESS_ON_DEMAND))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&dev->data_direct_lock);
+ data_direct_dev = dev->data_direct_dev;
+ if (!data_direct_dev) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* The device's 'data direct mkey' was created without RO flags to
+ * simplify things and allow for a single mkey per device.
+ * Since RO is not a must, mask it out accordingly.
+ */
+ access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
+ crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev,
+ offset, length, virt_addr, fd,
+ access_flags, MLX5_MKC_ACCESS_MODE_KSM);
+ if (IS_ERR(crossed_mr)) {
+ ret = PTR_ERR(crossed_mr);
+ goto end;
+ }
+
+ mutex_lock(&dev->slow_path_mutex);
+ crossing_mr = reg_create_crossing_vhca_mr(pd, virt_addr, length, access_flags,
+ crossed_mr->lkey);
+ mutex_unlock(&dev->slow_path_mutex);
+ if (IS_ERR(crossing_mr)) {
+ __mlx5_ib_dereg_mr(crossed_mr);
+ ret = PTR_ERR(crossing_mr);
+ goto end;
+ }
+
+ list_add_tail(&to_mmr(crossed_mr)->dd_node, &dev->data_direct_mr_list);
+ to_mmr(crossing_mr)->dd_crossed_mr = to_mmr(crossed_mr);
+ to_mmr(crossing_mr)->data_direct = true;
+end:
+ mutex_unlock(&dev->data_direct_lock);
+ return ret ? ERR_PTR(ret) : crossing_mr;
+}
+
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int mlx5_access_flags = 0;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) {
+ err = uverbs_get_flags32(&mlx5_access_flags, attrs,
+ MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ mlx5_ib_dbg(dev,
+ "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x, mlx5_access_flags 0x%x\n",
+ offset, virt_addr, length, fd, access_flags, mlx5_access_flags);
+
+ /* dmabuf requires xlt update via umr to work. */
+ if (!mlx5r_umr_can_load_pas(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ if (mlx5_access_flags & MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT)
+ return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr,
+ fd, access_flags);
+
+ return reg_user_mr_dmabuf(pd, pd->device->dma_device,
+ offset, length, virt_addr,
+ fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT);
+}
+
/*
* True if the change in access flags can be done via UMR, only some access
* flags can be updated.
@@ -1665,7 +1829,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
struct mlx5_ib_mr *mr = to_mmr(ib_mr);
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct)
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(
@@ -1793,7 +1957,7 @@ mlx5_alloc_priv_descs(struct ib_device *device,
static void
mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
- if (!mr->umem && mr->descs) {
+ if (!mr->umem && !mr->data_direct && mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1847,6 +2011,34 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
return ret;
}
+static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+ int err;
+
+ lockdep_assert_held(&dev->data_direct_lock);
+ mr->revoked = true;
+ err = mlx5r_umr_revoke_mr(mr);
+ if (WARN_ON(err))
+ return err;
+
+ ib_umem_dmabuf_revoke(umem_dmabuf);
+ return 0;
+}
+
+void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_mr *mr, *next;
+
+ lockdep_assert_held(&dev->data_direct_lock);
+
+ list_for_each_entry_safe(mr, next, &dev->data_direct_mr_list, dd_node) {
+ list_del(&mr->dd_node);
+ mlx5_ib_revoke_data_direct_mr(mr);
+ }
+}
+
static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
@@ -1864,7 +2056,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
return destroy_mkey(dev, mr);
}
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
@@ -1931,6 +2123,36 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
return 0;
}
+static int dereg_crossing_data_direct_mr(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_mr *dd_crossed_mr = mr->dd_crossed_mr;
+ int ret;
+
+ ret = __mlx5_ib_dereg_mr(&mr->ibmr);
+ if (ret)
+ return ret;
+
+ mutex_lock(&dev->data_direct_lock);
+ if (!dd_crossed_mr->revoked)
+ list_del(&dd_crossed_mr->dd_node);
+
+ ret = __mlx5_ib_dereg_mr(&dd_crossed_mr->ibmr);
+ mutex_unlock(&dev->data_direct_lock);
+ return ret;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+
+ if (mr->data_direct)
+ return dereg_crossing_data_direct_mr(dev, mr);
+
+ return __mlx5_ib_dereg_mr(ibmr);
+}
+
static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
int access_mode, int page_shift)
{
@@ -710,7 +710,10 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
err = -EINVAL;
} else {
- err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
+ if (mr->data_direct)
+ err = mlx5r_umr_update_data_direct_ksm_pas(mr, xlt_flags);
+ else
+ err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
}
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
@@ -632,44 +632,47 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
wqe->data_seg.byte_count = cpu_to_be32(sg->length);
}
-/*
- * Send the DMA list to the HW for a normal MR using UMR.
- * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
- * flag may be used.
- */
-int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+static int
+_mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd)
{
+ size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct mlx5r_umr_wqe wqe = {};
struct ib_block_iter biter;
+ struct mlx5_ksm *cur_ksm;
struct mlx5_mtt *cur_mtt;
size_t orig_sg_length;
- struct mlx5_mtt *mtt;
size_t final_size;
+ void *curr_entry;
struct ib_sge sg;
+ void *entry;
u64 offset = 0;
int err = 0;
- if (WARN_ON(mr->umem->is_odp))
- return -EINVAL;
-
- mtt = mlx5r_umr_create_xlt(
- dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
- sizeof(*mtt), flags);
- if (!mtt)
+ entry = mlx5r_umr_create_xlt(dev, &sg,
+ ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
+ ent_size, flags);
+ if (!entry)
return -ENOMEM;
orig_sg_length = sg.length;
-
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
mr->page_shift);
+ if (dd) {
+ /* Use the data direct internal kernel PD */
+ MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
+ cur_ksm = entry;
+ } else {
+ cur_mtt = entry;
+ }
+
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
- cur_mtt = mtt;
+ curr_entry = entry;
rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
- if (cur_mtt == (void *)mtt + sg.length) {
+ if (curr_entry == entry + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
@@ -681,23 +684,31 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
DMA_TO_DEVICE);
offset += sg.length;
mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
-
- cur_mtt = mtt;
+ if (dd)
+ cur_ksm = entry;
+ else
+ cur_mtt = entry;
}
- cur_mtt->ptag =
- cpu_to_be64(rdma_block_iter_dma_address(&biter) |
- MLX5_IB_MTT_PRESENT);
-
- if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
- cur_mtt->ptag = 0;
-
- cur_mtt++;
+ if (dd) {
+ cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
+ cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
+ cur_ksm++;
+ curr_entry = cur_ksm;
+ } else {
+ cur_mtt->ptag =
+ cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ MLX5_IB_MTT_PRESENT);
+ if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
+ cur_mtt->ptag = 0;
+ cur_mtt++;
+ curr_entry = cur_mtt;
+ }
}
- final_size = (void *)cur_mtt - (void *)mtt;
+ final_size = curr_entry - entry;
sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT);
- memset(cur_mtt, 0, sg.length - final_size);
+ memset(curr_entry, 0, sg.length - final_size);
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
@@ -705,10 +716,32 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
err:
sg.length = orig_sg_length;
- mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
+ mlx5r_umr_unmap_free_xlt(dev, entry, &sg);
return err;
}
+int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ /* No invalidation flow is expected */
+ if (WARN_ON(!mr->umem->is_dmabuf) || (flags & MLX5_IB_UPD_XLT_ZAP))
+ return -EINVAL;
+
+ return _mlx5r_umr_update_mr_pas(mr, flags, true);
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
+ * flag may be used.
+ */
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ if (WARN_ON(mr->umem->is_odp))
+ return -EINVAL;
+
+ return _mlx5r_umr_update_mr_pas(mr, flags, false);
+}
+
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
@@ -95,6 +95,7 @@ int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags);
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
+int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
@@ -274,6 +274,10 @@ enum mlx5_ib_create_cq_attrs {
MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW,
};
+enum mlx5_ib_reg_dmabuf_mr_attrs {
+ MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS = (1U << UVERBS_ID_NS_SHIFT),
+};
+
#define MLX5_IB_DW_MATCH_PARAM 0xA0
struct mlx5_ib_match_params {
@@ -54,6 +54,10 @@ enum mlx5_ib_uapi_flow_action_packet_reformat_type {
MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
};
+enum mlx5_ib_uapi_reg_dmabuf_flags {
+ MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT = 1 << 0,
+};
+
struct mlx5_ib_uapi_devx_async_cmd_hdr {
__aligned_u64 wr_id;
__u8 out_data[];