diff mbox

[rdma-next,1/1] IB/mlx5: Check for at least 1 medium bfreg

Message ID 1524768022-14878-1-git-send-email-rzambre@uci.edu (mailing list archive)
State Superseded
Headers show

Commit Message

Rohit Zambre April 26, 2018, 6:40 p.m. UTC
The problem occurs when the num_low_latency_bfregs equals
total_num_bfregs-1. In this case, the current bfreg-to-QP mapping policy
maps the first QP of the context to bfreg_1. However, the userspace mlx5
driver designates bfreg_1 as a medium bfreg; need_lock for bf_1 evaluates
to 1. Hence, the kernel is allocatng a medium bfreg to the first QP of
the context, which is incorrect behavior.

More importantly, for any combination of total_num_bfregs and
num_low_latency_bfregs, there must be at least 1 medium bfreg to
accommodate for the QPs created after the first #num_low_latency_bfregs
QPs in the context. For example, if the user requests for 4 total bfregs
and 3 low latency bfregs, the driver cannot map any QP after the 3rd one
without breaking the definition of a low latency bfreg: one that is mapped
to only 1 QP and hence doesn't need a lock. Currently, the driver allows
for such a user-request to pass.

The current check only accounts for the 0th bfreg. This fix implements the
check for the constraint on the minimum number of medium bfregs required.

Signed-off-by: Rohit Zambre <rzambre@uci.edu>
---
 drivers/infiniband/hw/mlx5/main.c | 5 +++--
 drivers/infiniband/hw/mlx5/qp.c   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

Comments

Yishai Hadas April 29, 2018, 8:24 a.m. UTC | #1
On 4/26/2018 9:40 PM, Rohit Zambre wrote:
> The problem occurs when the num_low_latency_bfregs equals
> total_num_bfregs-1. In this case, the current bfreg-to-QP mapping policy
> maps the first QP of the context to bfreg_1. However, the userspace mlx5
> driver designates bfreg_1 as a medium bfreg;

There was no design to have at least 1 medium bfreg, in case there is 
some limitation/issue in the userspace mlx5 code it should be handled 
there in a way that will drop this limitation.

  need_lock for bf_1 evaluates
> to 1. Hence, the kernel is allocatng a medium bfreg to the first QP of
> the context, which is incorrect behavior.
> 
> More importantly, for any combination of total_num_bfregs and
> num_low_latency_bfregs, there must be at least 1 medium bfreg to
> accommodate for the QPs created after the first #num_low_latency_bfregs
> QPs in the context. For example, if the user requests for 4 total bfregs
> and 3 low latency bfregs, the driver cannot map any QP after the 3rd one
> without breaking the definition of a low latency bfreg: one that is mapped
> to only 1 QP and hence doesn't need a lock. Currently, the driver allows
> for such a user-request to pass.
> 
> The current check only accounts for the 0th bfreg. This fix implements the
> check for the constraint on the minimum number of medium bfregs required.
> 
> Signed-off-by: Rohit Zambre <rzambre@uci.edu>
> ---
>   drivers/infiniband/hw/mlx5/main.c | 5 +++--
>   drivers/infiniband/hw/mlx5/qp.c   | 2 +-
>   2 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> index daa919e..75c4b26 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -1525,7 +1525,7 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
>   	bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
>   	/* This holds the required static allocation asked by the user */
>   	req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
> -	if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
> +	if (req->num_low_latency_bfregs > req->total_num_bfregs - 2)
>   		return -EINVAL;
>   

As of the above, this is not the expected change, need to handle in the 
userspace area in a way that will be no such limitation.

>   	bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
> @@ -1669,7 +1669,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
>   
>   	req.total_num_bfregs = ALIGN(req.total_num_bfregs,
>   				    MLX5_NON_FP_BFREGS_PER_UAR);
> -	if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
> +	/* account for the 0th bfreg and at least one medium bfreg */
> +	if (req.num_low_latency_bfregs > req.total_num_bfregs - 2)
>   		return ERR_PTR(-EINVAL);
>   
>   	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
> diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
> index 7ed4b70..ad09174 100644
> --- a/drivers/infiniband/hw/mlx5/qp.c
> +++ b/drivers/infiniband/hw/mlx5/qp.c
> @@ -507,7 +507,7 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev,
>   	n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs -
>   	    NUM_NON_BLUE_FLAME_BFREGS;
>   
> -	return n >= 0 ? n : 0;
> +	return n > 0 ? n : 1;
>   }
>   
>   static int first_hi_bfreg(struct mlx5_ib_dev *dev,
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index daa919e..75c4b26 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1525,7 +1525,7 @@  static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
 	bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
 	/* This holds the required static allocation asked by the user */
 	req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
-	if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
+	if (req->num_low_latency_bfregs > req->total_num_bfregs - 2)
 		return -EINVAL;
 
 	bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
@@ -1669,7 +1669,8 @@  static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 
 	req.total_num_bfregs = ALIGN(req.total_num_bfregs,
 				    MLX5_NON_FP_BFREGS_PER_UAR);
-	if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
+	/* account for the 0th bfreg and at least one medium bfreg */
+	if (req.num_low_latency_bfregs > req.total_num_bfregs - 2)
 		return ERR_PTR(-EINVAL);
 
 	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7ed4b70..ad09174 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -507,7 +507,7 @@  static int num_med_bfreg(struct mlx5_ib_dev *dev,
 	n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs -
 	    NUM_NON_BLUE_FLAME_BFREGS;
 
-	return n >= 0 ? n : 0;
+	return n > 0 ? n : 1;
 }
 
 static int first_hi_bfreg(struct mlx5_ib_dev *dev,