diff mbox

[V3] IB/mlx4: Use vmalloc for WR buffers when needed

Message ID 1444282024-11425-1-git-send-email-wen.gang.wang@oracle.com (mailing list archive)
State Accepted
Headers show

Commit Message

Wengang Wang Oct. 8, 2015, 5:27 a.m. UTC
There are several hits that WR buffer allocation(kmalloc) failed.
It failed at order 3 and/or 4 contigous pages allocation. At the same time
there are actually 100MB+ free memory but well fragmented.
So try vmalloc when kmalloc failed.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/infiniband/hw/mlx4/qp.c  | 19 +++++++++++++------
 drivers/infiniband/hw/mlx4/srq.c | 11 ++++++++---
 2 files changed, 21 insertions(+), 9 deletions(-)

Comments

Matthew Finlay Dec. 11, 2015, 8:28 p.m. UTC | #1
Hi Wengang,

I was going through your patch set here, and it seems that you missed changing kfree to kvfree in mlx4_ib_destroy_srq().  In the current code if the srq wrid is allocated using vmalloc, then on cleanup we will use kfree, which is a bug.

Thanks,
-matt




On 10/7/15, 10:27 PM, "linux-rdma-owner@vger.kernel.org on behalf of Wengang Wang" <linux-rdma-owner@vger.kernel.org on behalf of wen.gang.wang@oracle.com> wrote:

>There are several hits that WR buffer allocation(kmalloc) failed.

>It failed at order 3 and/or 4 contigous pages allocation. At the same time

>there are actually 100MB+ free memory but well fragmented.

>So try vmalloc when kmalloc failed.

>

>Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>

>Acked-by: Or Gerlitz <ogerlitz@mellanox.com>

>---

> drivers/infiniband/hw/mlx4/qp.c  | 19 +++++++++++++------

> drivers/infiniband/hw/mlx4/srq.c | 11 ++++++++---

> 2 files changed, 21 insertions(+), 9 deletions(-)

>

>diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c

>index 4ad9be3..3ccbd3a 100644

>--- a/drivers/infiniband/hw/mlx4/qp.c

>+++ b/drivers/infiniband/hw/mlx4/qp.c

>@@ -34,6 +34,7 @@

> #include <linux/log2.h>

> #include <linux/slab.h>

> #include <linux/netdevice.h>

>+#include <linux/vmalloc.h>

> 

> #include <rdma/ib_cache.h>

> #include <rdma/ib_pack.h>

>@@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,

> 		if (err)

> 			goto err_mtt;

> 

>-		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);

>-		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);

>+		qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);

>+		if (!qp->sq.wrid)

>+			qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),

>+						gfp, PAGE_KERNEL);

>+		qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);

>+		if (!qp->rq.wrid)

>+			qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),

>+						gfp, PAGE_KERNEL);

> 		if (!qp->sq.wrid || !qp->rq.wrid) {

> 			err = -ENOMEM;

> 			goto err_wrid;

>@@ -874,8 +881,8 @@ err_wrid:

> 		if (qp_has_rq(init_attr))

> 			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);

> 	} else {

>-		kfree(qp->sq.wrid);

>-		kfree(qp->rq.wrid);

>+		kvfree(qp->sq.wrid);

>+		kvfree(qp->rq.wrid);

> 	}

> 

> err_mtt:

>@@ -1050,8 +1057,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,

> 					      &qp->db);

> 		ib_umem_release(qp->umem);

> 	} else {

>-		kfree(qp->sq.wrid);

>-		kfree(qp->rq.wrid);

>+		kvfree(qp->sq.wrid);

>+		kvfree(qp->rq.wrid);

> 		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |

> 		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))

> 			free_proxy_bufs(&dev->ib_dev, qp);

>diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c

>index dce5dfe..8d133c4 100644

>--- a/drivers/infiniband/hw/mlx4/srq.c

>+++ b/drivers/infiniband/hw/mlx4/srq.c

>@@ -34,6 +34,7 @@

> #include <linux/mlx4/qp.h>

> #include <linux/mlx4/srq.h>

> #include <linux/slab.h>

>+#include <linux/vmalloc.h>

> 

> #include "mlx4_ib.h"

> #include "user.h"

>@@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,

> 

> 		srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);

> 		if (!srq->wrid) {

>-			err = -ENOMEM;

>-			goto err_mtt;

>+			srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),

>+					      GFP_KERNEL, PAGE_KERNEL);

>+			if (!srq->wrid) {

>+				err = -ENOMEM;

>+				goto err_mtt;

>+			}

> 		}

> 	}

> 

>@@ -204,7 +209,7 @@ err_wrid:

> 	if (pd->uobject)

> 		mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);

> 	else

>-		kfree(srq->wrid);

>+		kvfree(srq->wrid);

> 

> err_mtt:

> 	mlx4_mtt_cleanup(dev->dev, &srq->mtt);

>-- 

>2.1.0

>

>--

>To unsubscribe from this list: send the line "unsubscribe linux-rdma" in

>the body of a message to majordomo@vger.kernel.org

>More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wengang Wang Dec. 17, 2015, 2:39 a.m. UTC | #2
Hi Matt,

Yes, you are right.
Since the patch is already merged in, I am going to make a separated 
patch for that.

thanks,
wengang

? 2015?12?12? 04:28, Matthew Finlay ??:
> Hi Wengang,
>
> I was going through your patch set here, and it seems that you missed changing kfree to kvfree in mlx4_ib_destroy_srq().  In the current code if the srq wrid is allocated using vmalloc, then on cleanup we will use kfree, which is a bug.
>
> Thanks,
> -matt
>
>
>
>
> On 10/7/15, 10:27 PM, "linux-rdma-owner@vger.kernel.org on behalf of Wengang Wang" <linux-rdma-owner@vger.kernel.org on behalf of wen.gang.wang@oracle.com> wrote:
>
>> There are several hits that WR buffer allocation(kmalloc) failed.
>> It failed at order 3 and/or 4 contigous pages allocation. At the same time
>> there are actually 100MB+ free memory but well fragmented.
>> So try vmalloc when kmalloc failed.
>>
>> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
>> Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
>> ---
>> drivers/infiniband/hw/mlx4/qp.c  | 19 +++++++++++++------
>> drivers/infiniband/hw/mlx4/srq.c | 11 ++++++++---
>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
>> index 4ad9be3..3ccbd3a 100644
>> --- a/drivers/infiniband/hw/mlx4/qp.c
>> +++ b/drivers/infiniband/hw/mlx4/qp.c
>> @@ -34,6 +34,7 @@
>> #include <linux/log2.h>
>> #include <linux/slab.h>
>> #include <linux/netdevice.h>
>> +#include <linux/vmalloc.h>
>>
>> #include <rdma/ib_cache.h>
>> #include <rdma/ib_pack.h>
>> @@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
>> 		if (err)
>> 			goto err_mtt;
>>
>> -		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
>> -		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
>> +		qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
>> +		if (!qp->sq.wrid)
>> +			qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
>> +						gfp, PAGE_KERNEL);
>> +		qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
>> +		if (!qp->rq.wrid)
>> +			qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
>> +						gfp, PAGE_KERNEL);
>> 		if (!qp->sq.wrid || !qp->rq.wrid) {
>> 			err = -ENOMEM;
>> 			goto err_wrid;
>> @@ -874,8 +881,8 @@ err_wrid:
>> 		if (qp_has_rq(init_attr))
>> 			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
>> 	} else {
>> -		kfree(qp->sq.wrid);
>> -		kfree(qp->rq.wrid);
>> +		kvfree(qp->sq.wrid);
>> +		kvfree(qp->rq.wrid);
>> 	}
>>
>> err_mtt:
>> @@ -1050,8 +1057,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
>> 					      &qp->db);
>> 		ib_umem_release(qp->umem);
>> 	} else {
>> -		kfree(qp->sq.wrid);
>> -		kfree(qp->rq.wrid);
>> +		kvfree(qp->sq.wrid);
>> +		kvfree(qp->rq.wrid);
>> 		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
>> 		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
>> 			free_proxy_bufs(&dev->ib_dev, qp);
>> diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
>> index dce5dfe..8d133c4 100644
>> --- a/drivers/infiniband/hw/mlx4/srq.c
>> +++ b/drivers/infiniband/hw/mlx4/srq.c
>> @@ -34,6 +34,7 @@
>> #include <linux/mlx4/qp.h>
>> #include <linux/mlx4/srq.h>
>> #include <linux/slab.h>
>> +#include <linux/vmalloc.h>
>>
>> #include "mlx4_ib.h"
>> #include "user.h"
>> @@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
>>
>> 		srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
>> 		if (!srq->wrid) {
>> -			err = -ENOMEM;
>> -			goto err_mtt;
>> +			srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
>> +					      GFP_KERNEL, PAGE_KERNEL);
>> +			if (!srq->wrid) {
>> +				err = -ENOMEM;
>> +				goto err_mtt;
>> +			}
>> 		}
>> 	}
>>
>> @@ -204,7 +209,7 @@ err_wrid:
>> 	if (pd->uobject)
>> 		mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
>> 	else
>> -		kfree(srq->wrid);
>> +		kvfree(srq->wrid);
>>
>> err_mtt:
>> 	mlx4_mtt_cleanup(dev->dev, &srq->mtt);
>> -- 
>> 2.1.0
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> N?????r??y???b?X???v?^?)?{.n?+????{????{ay????,j??f???h???z??w??????j:+v???w?j?m????????zZ+??????j"??!tml=

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4ad9be3..3ccbd3a 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -34,6 +34,7 @@ 
 #include <linux/log2.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
+#include <linux/vmalloc.h>
 
 #include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
@@ -786,8 +787,14 @@  static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (err)
 			goto err_mtt;
 
-		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
-		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+		qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+		if (!qp->sq.wrid)
+			qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+						gfp, PAGE_KERNEL);
+		qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+		if (!qp->rq.wrid)
+			qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
+						gfp, PAGE_KERNEL);
 		if (!qp->sq.wrid || !qp->rq.wrid) {
 			err = -ENOMEM;
 			goto err_wrid;
@@ -874,8 +881,8 @@  err_wrid:
 		if (qp_has_rq(init_attr))
 			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
 	} else {
-		kfree(qp->sq.wrid);
-		kfree(qp->rq.wrid);
+		kvfree(qp->sq.wrid);
+		kvfree(qp->rq.wrid);
 	}
 
 err_mtt:
@@ -1050,8 +1057,8 @@  static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 					      &qp->db);
 		ib_umem_release(qp->umem);
 	} else {
-		kfree(qp->sq.wrid);
-		kfree(qp->rq.wrid);
+		kvfree(qp->sq.wrid);
+		kvfree(qp->rq.wrid);
 		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
 		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
 			free_proxy_bufs(&dev->ib_dev, qp);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index dce5dfe..8d133c4 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -34,6 +34,7 @@ 
 #include <linux/mlx4/qp.h>
 #include <linux/mlx4/srq.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include "mlx4_ib.h"
 #include "user.h"
@@ -172,8 +173,12 @@  struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 
 		srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
 		if (!srq->wrid) {
-			err = -ENOMEM;
-			goto err_mtt;
+			srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
+					      GFP_KERNEL, PAGE_KERNEL);
+			if (!srq->wrid) {
+				err = -ENOMEM;
+				goto err_mtt;
+			}
 		}
 	}
 
@@ -204,7 +209,7 @@  err_wrid:
 	if (pd->uobject)
 		mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
 	else
-		kfree(srq->wrid);
+		kvfree(srq->wrid);
 
 err_mtt:
 	mlx4_mtt_cleanup(dev->dev, &srq->mtt);