diff mbox

IB/mlx4: Use vmalloc for WR buffers when needed

Message ID 1443091747-13881-1-git-send-email-wen.gang.wang@oracle.com (mailing list archive)
State Superseded
Headers show

Commit Message

Wengang Wang Sept. 24, 2015, 10:49 a.m. UTC
There are several hits that WR buffer allocation(kmalloc) failed.
It failed at order 3 and/or 4 contigous pages allocation. At the same time
there are actually 100MB+ free memory but well fragmented.
So try vmalloc when kmalloc failed.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
 drivers/infiniband/hw/mlx4/qp.c  | 20 ++++++++++++++------
 drivers/infiniband/hw/mlx4/srq.c | 11 ++++++++---
 2 files changed, 22 insertions(+), 9 deletions(-)

Comments

Or Gerlitz Sept. 24, 2015, 11:57 a.m. UTC | #1
On Thu, Sep 24, 2015 at 1:49 PM, Wengang Wang <wen.gang.wang@oracle.com> wrote:
> @@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
>                 if (err)
>                         goto err_mtt;
>
> -               qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
> -               qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
> +               qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
> +               if (!qp->sq.wrid)
> +                       qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
> +                                               gfp, PAGE_KERNEL);

On other spots of mlx4, we're using vmalloc and not __vmalloc, any
pros/cons for going that way too here?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wengang Wang Sept. 25, 2015, 12:51 a.m. UTC | #2
Hi Or,

? 2015?09?24? 19:57, Or Gerlitz ??:
> On Thu, Sep 24, 2015 at 1:49 PM, Wengang Wang <wen.gang.wang@oracle.com> wrote:
>> @@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
>>                  if (err)
>>                          goto err_mtt;
>>
>> -               qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
>> -               qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
>> +               qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
>> +               if (!qp->sq.wrid)
>> +                       qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
>> +                                               gfp, PAGE_KERNEL);
> On other spots of mlx4, we're using vmalloc and not __vmalloc, any
> pros/cons for going that way too here?

vmalloc is just using GFP_KERNEL | __GFP_HIGHMEM, we can't pass in the 
flag gfp with it.  We should respect orginal code which needs to pass 
the flag.

thanks,
wengang

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Oct. 8, 2015, 5:10 a.m. UTC | #3
On 9/24/2015 1:49 PM, Wengang Wang wrote:
> @@ -1050,8 +1057,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
>   					      &qp->db);
>   		ib_umem_release(qp->umem);
>   	} else {
> -		kfree(qp->sq.wrid);
> -		kfree(qp->rq.wrid);
> +		kvfree(qp->sq.wrid);
> +		kvfree(qp->rq.wrid);
> +
>   		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
>   		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
>   			free_proxy_bufs(&dev->ib_dev, qp);

when you fix issue A you don't conduct cleanup for issue B, remove the 
new line added here andadd

Acked-by: Or Gerlitz <ogerlitz@mellanox.com>

When you re-submit patches makes sure to maintain version, e.g the 
re-post would be [PATCH V2] (I believe
this is going to be the 3rd version, so V0... V1 ... and now V2)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Oct. 8, 2015, 6:06 a.m. UTC | #4
On Fri, Sep 25, 2015 at 08:51:22AM +0800, Wengang Wang wrote:
> Hi Or,
> 
> ? 2015?09?24? 19:57, Or Gerlitz ??:
> >On Thu, Sep 24, 2015 at 1:49 PM, Wengang Wang <wen.gang.wang@oracle.com> wrote:
> >>@@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
> >>                 if (err)
> >>                         goto err_mtt;
> >>
> >>-               qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
> >>-               qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
> >>+               qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
> >>+               if (!qp->sq.wrid)
> >>+                       qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
> >>+                                               gfp, PAGE_KERNEL);
> >On other spots of mlx4, we're using vmalloc and not __vmalloc, any
> >pros/cons for going that way too here?
> 
> vmalloc is just using GFP_KERNEL | __GFP_HIGHMEM, we can't pass in the flag
> gfp with it.  We should respect orginal code which needs to pass the flag.
Additionally, I want to spot Or's attention on the following discussion
in MM-subsystem about kmalloc/vmalloc and general function to fallback
from one to another.

[1] [PATCH 2/7] mm: introduce kvmalloc and kvmalloc_node
https://lkml.org/lkml/2015/7/7/548
[2] [PATCH 0/7] mm: reliable memory allocation with kvmalloc
https://lkml.org/lkml/2015/7/7/545

> 
> thanks,
> wengang
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Oct. 8, 2015, 6:14 a.m. UTC | #5
On 10/8/2015 9:06 AM, Leon Romanovsky wrote:
> Additionally, I want to spot Or's attention on the following discussion
> in MM-subsystem about kmalloc/vmalloc and general function to fallback
> from one to another.
>
>

too busy to read them now, if you have review comments speak now and 
provide them to the author.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Oct. 8, 2015, 6:25 a.m. UTC | #6
On Thu, Oct 8, 2015 at 9:14 AM, Or Gerlitz <ogerlitz@mellanox.com> wrote:
> On 10/8/2015 9:06 AM, Leon Romanovsky wrote:
>>
>> Additionally, I want to spot Or's attention on the following discussion
>> in MM-subsystem about kmalloc/vmalloc and general function to fallback
>> from one to another.
>>
>>
>
> too busy to read them now, if you have review comments speak now and provide
> them to the author.
My comments to author that from my point of view this patch is a
correct to fix current behaviour.

The more general solution (I doubt if it is feasible) is to decrease
the dependency on high order allocations.

>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4ad9be3..f152d8a 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -34,6 +34,7 @@ 
 #include <linux/log2.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
+#include <linux/vmalloc.h>
 
 #include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
@@ -786,8 +787,14 @@  static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (err)
 			goto err_mtt;
 
-		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
-		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+		qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+		if (!qp->sq.wrid)
+			qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+						gfp, PAGE_KERNEL);
+		qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+		if (!qp->rq.wrid)
+			qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
+						gfp, PAGE_KERNEL);
 		if (!qp->sq.wrid || !qp->rq.wrid) {
 			err = -ENOMEM;
 			goto err_wrid;
@@ -874,8 +881,8 @@  err_wrid:
 		if (qp_has_rq(init_attr))
 			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
 	} else {
-		kfree(qp->sq.wrid);
-		kfree(qp->rq.wrid);
+		kvfree(qp->sq.wrid);
+		kvfree(qp->rq.wrid);
 	}
 
 err_mtt:
@@ -1050,8 +1057,9 @@  static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 					      &qp->db);
 		ib_umem_release(qp->umem);
 	} else {
-		kfree(qp->sq.wrid);
-		kfree(qp->rq.wrid);
+		kvfree(qp->sq.wrid);
+		kvfree(qp->rq.wrid);
+
 		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
 		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
 			free_proxy_bufs(&dev->ib_dev, qp);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index dce5dfe..8d133c4 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -34,6 +34,7 @@ 
 #include <linux/mlx4/qp.h>
 #include <linux/mlx4/srq.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include "mlx4_ib.h"
 #include "user.h"
@@ -172,8 +173,12 @@  struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 
 		srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
 		if (!srq->wrid) {
-			err = -ENOMEM;
-			goto err_mtt;
+			srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
+					      GFP_KERNEL, PAGE_KERNEL);
+			if (!srq->wrid) {
+				err = -ENOMEM;
+				goto err_mtt;
+			}
 		}
 	}
 
@@ -204,7 +209,7 @@  err_wrid:
 	if (pd->uobject)
 		mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
 	else
-		kfree(srq->wrid);
+		kvfree(srq->wrid);
 
 err_mtt:
 	mlx4_mtt_cleanup(dev->dev, &srq->mtt);