diff mbox

[WIP,21/43] mlx5: Allocate a private page list in ib_alloc_mr

Message ID 1437548143-24893-22-git-send-email-sagig@mellanox.com (mailing list archive)
State RFC
Headers show

Commit Message

Sagi Grimberg July 22, 2015, 6:55 a.m. UTC
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  5 ++++
 drivers/infiniband/hw/mlx5/mr.c      | 45 ++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

Comments

Christoph Hellwig July 22, 2015, 4:46 p.m. UTC | #1
Just curious: what's the tradeoff between allocating the page list
in the core vs duplicating it in all the drivers?  Does the driver
variant give us any benefits?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg July 22, 2015, 4:51 p.m. UTC | #2
On 7/22/2015 7:46 PM, Christoph Hellwig wrote:
> Just curious: what's the tradeoff between allocating the page list
> in the core vs duplicating it in all the drivers?  Does the driver
> variant give us any benefits?

It's not necessarily a page list... (i.e. a real scatterlist).
I it will make more sense in patch 41/43.

Moreover, as I wrote in the cover-letter. I noticed that several
drivers keep shadows anyway for various reasons. For example mlx4
sets the page list with a preset-bit (related to ODP...) so at
registration time we see the loop:

for (i = 0; i < mr->npages; ++i)
         mr->mpl[i] = cpu_to_be64(mr->pl[i] | MLX4_MTT_FLAG_PRESENT);

Given that this not a single example, I'd expect drivers to skip this
duplication (hopefully).

Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haggai Eran July 28, 2015, 10:57 a.m. UTC | #3
Hi Sagi,

On 22/07/2015 09:55, Sagi Grimberg wrote:
> Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
> ---
>  drivers/infiniband/hw/mlx5/mlx5_ib.h |  5 ++++
>  drivers/infiniband/hw/mlx5/mr.c      | 45 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 50 insertions(+)
> 
> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> index c2916f1..df5e959 100644
> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> @@ -315,6 +315,11 @@ enum mlx5_ib_mtt_access_flags {
>  
>  struct mlx5_ib_mr {
>  	struct ib_mr		ibmr;
> +	u64		        *pl;
> +	__be64			*mpl;
> +	dma_addr_t		pl_map;
Nit: could you choose more descriptive names for these fields? It can be
difficult to understand what they mean just based on the acronym.

> +	int			ndescs;
This one isn't used in this patch, right?

> +	int			max_descs;
>  	struct mlx5_core_mr	mmr;
>  	struct ib_umem	       *umem;
>  	struct mlx5_shared_mr_info	*smr_info;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg July 30, 2015, 8:08 a.m. UTC | #4
On 7/28/2015 1:57 PM, Haggai Eran wrote:
> Hi Sagi,
>
> On 22/07/2015 09:55, Sagi Grimberg wrote:
>> Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
>> ---
>>   drivers/infiniband/hw/mlx5/mlx5_ib.h |  5 ++++
>>   drivers/infiniband/hw/mlx5/mr.c      | 45 ++++++++++++++++++++++++++++++++++++
>>   2 files changed, 50 insertions(+)
>>
>> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
>> index c2916f1..df5e959 100644
>> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
>> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
>> @@ -315,6 +315,11 @@ enum mlx5_ib_mtt_access_flags {
>>
>>   struct mlx5_ib_mr {
>>   	struct ib_mr		ibmr;
>> +	u64		        *pl;
>> +	__be64			*mpl;
>> +	dma_addr_t		pl_map;
> Nit: could you choose more descriptive names for these fields? It can be
> difficult to understand what they mean just based on the acronym.

OK - I'll name it better in v1.

>
>> +	int			ndescs;
> This one isn't used in this patch, right?

Not in this patch - I can move it.

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c2916f1..df5e959 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -315,6 +315,11 @@  enum mlx5_ib_mtt_access_flags {
 
 struct mlx5_ib_mr {
 	struct ib_mr		ibmr;
+	u64		        *pl;
+	__be64			*mpl;
+	dma_addr_t		pl_map;
+	int			ndescs;
+	int			max_descs;
 	struct mlx5_core_mr	mmr;
 	struct ib_umem	       *umem;
 	struct mlx5_shared_mr_info	*smr_info;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index c8de302..1075065 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1167,6 +1167,42 @@  error:
 	return err;
 }
 
+static int
+mlx5_alloc_page_list(struct ib_device *device,
+		     struct mlx5_ib_mr *mr, int ndescs)
+{
+	int size = ndescs * sizeof(u64);
+
+	mr->pl = kcalloc(ndescs, sizeof(u64), GFP_KERNEL);
+	if (!mr->pl)
+		return -ENOMEM;
+
+	mr->mpl = dma_alloc_coherent(device->dma_device, size,
+				     &mr->pl_map, GFP_KERNEL);
+	if (!mr->mpl)
+		goto err;
+
+	return 0;
+err:
+	kfree(mr->pl);
+
+	return -ENOMEM;
+}
+
+static void
+mlx5_free_page_list(struct mlx5_ib_mr *mr)
+{
+	struct ib_device *device = mr->ibmr.device;
+	int size = mr->max_descs * sizeof(u64);
+
+	kfree(mr->pl);
+	if (mr->mpl)
+		dma_free_coherent(device->dma_device, size,
+				  mr->mpl, mr->pl_map);
+	mr->pl = NULL;
+	mr->mpl = NULL;
+}
+
 static int clean_mr(struct mlx5_ib_mr *mr)
 {
 	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
@@ -1186,6 +1222,8 @@  static int clean_mr(struct mlx5_ib_mr *mr)
 		mr->sig = NULL;
 	}
 
+	mlx5_free_page_list(mr);
+
 	if (!umred) {
 		err = destroy_mkey(dev, mr);
 		if (err) {
@@ -1279,6 +1317,12 @@  struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 	if (mr_type == IB_MR_TYPE_FAST_REG) {
 		access_mode = MLX5_ACCESS_MODE_MTT;
 		in->seg.log2_page_size = PAGE_SHIFT;
+
+		err = mlx5_alloc_page_list(pd->device, mr, ndescs);
+		if (err)
+			goto err_free_in;
+
+		mr->max_descs = ndescs;
 	} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
 		u32 psv_index[2];
 
@@ -1335,6 +1379,7 @@  err_destroy_psv:
 			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
 				     mr->sig->psv_wire.psv_idx);
 	}
+	mlx5_free_page_list(mr);
 err_free_sig:
 	kfree(mr->sig);
 err_free_in: