diff mbox series

[for-next,v3,1/2] IB/cma: Introduce rdma_set_min_rnr_timer()

Message ID 1617216194-12890-2-git-send-email-haakon.bugge@oracle.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series Introduce rdma_set_min_rnr_timer() and use it in RDS | expand

Commit Message

Haakon Bugge March 31, 2021, 6:43 p.m. UTC
Introduce the ability for kernel ULPs to adjust the minimum RNR Retry
timer. The INIT -> RTR transition executed by RDMA CM will be used for
this adjustment. This avoids an additional ib_modify_qp() call.

rdma_set_min_rnr_timer() must be called before the call to
rdma_connect() on the active side and before the call to rdma_accept()
on the passive side.

The default value of RNR Retry timer is zero, which translates to 655
ms. When the receiver is not ready to accept a send messages, it
encodes the RNR Retry timer value in the NAK. The requestor will then
wait at least the specified time value before retrying the send.

The 5-bit value to be supplied to the rdma_set_min_rnr_timer() is
documented in IBTA Table 45: "Encoding for RNR NAK Timer Field".

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/cma.c      | 41 ++++++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/cma_priv.h |  2 ++
 include/rdma/rdma_cm.h             |  2 ++
 3 files changed, 45 insertions(+)

Comments

Leon Romanovsky April 1, 2021, 10:54 a.m. UTC | #1
On Wed, Mar 31, 2021 at 08:43:13PM +0200, Håkon Bugge wrote:
> Introduce the ability for kernel ULPs to adjust the minimum RNR Retry
> timer. The INIT -> RTR transition executed by RDMA CM will be used for
> this adjustment. This avoids an additional ib_modify_qp() call.
> 
> rdma_set_min_rnr_timer() must be called before the call to
> rdma_connect() on the active side and before the call to rdma_accept()
> on the passive side.
> 
> The default value of RNR Retry timer is zero, which translates to 655
> ms. When the receiver is not ready to accept a send messages, it
> encodes the RNR Retry timer value in the NAK. The requestor will then
> wait at least the specified time value before retrying the send.
> 
> The 5-bit value to be supplied to the rdma_set_min_rnr_timer() is
> documented in IBTA Table 45: "Encoding for RNR NAK Timer Field".
> 
> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> Acked-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/infiniband/core/cma.c      | 41 ++++++++++++++++++++++++++++++++++++++
>  drivers/infiniband/core/cma_priv.h |  2 ++
>  include/rdma/rdma_cm.h             |  2 ++
>  3 files changed, 45 insertions(+)
> 
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index 9409651..5ce097d 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -852,6 +852,7 @@ static void cma_id_put(struct rdma_id_private *id_priv)
>  	id_priv->id.qp_type = qp_type;
>  	id_priv->tos_set = false;
>  	id_priv->timeout_set = false;
> +	id_priv->min_rnr_timer_set = false;
>  	id_priv->gid_type = IB_GID_TYPE_IB;
>  	spin_lock_init(&id_priv->lock);
>  	mutex_init(&id_priv->qp_mutex);
> @@ -1141,6 +1142,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>  	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
>  		qp_attr->timeout = id_priv->timeout;
>  
> +	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
> +		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL(rdma_init_qp_attr);
> @@ -2615,6 +2619,43 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
>  }
>  EXPORT_SYMBOL(rdma_set_ack_timeout);
>  
> +/**
> + * rdma_set_min_rnr_timer() - Set the minimum RNR Retry timer of the
> + *			      QP associated with a connection identifier.
> + * @id: Communication identifier to associated with service type.
> + * @min_rnr_timer: 5-bit value encoded as Table 45: "Encoding for RNR NAK
> + *		   Timer Field" in the IBTA specification.
> + *
> + * This function should be called before rdma_connect() on active
> + * side, and on passive side before rdma_accept(). The timer value
> + * will be associated with the local QP. When it receives a send it is
> + * not read to handle, typically if the receive queue is empty, an RNR
> + * Retry NAK is returned to the requester with the min_rnr_timer
> + * encoded. The requester will then wait at least the time specified
> + * in the NAK before retrying. The default is zero, which translates
> + * to a minimum RNR Timer value of 655 ms.
> + *
> + * Return: 0 for success
> + */
> +int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
> +{
> +	struct rdma_id_private *id_priv;
> +
> +	/* It is a five-bit value */
> +	if (min_rnr_timer & 0xe0)
> +		return -EINVAL;
> +
> +	if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT)
> +		return -EINVAL;

This is in-kernel API and safe to use WARN_ON() instead of returning
error which RDS is not checking anyway.

Thanks
diff mbox series

Patch

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9409651..5ce097d 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -852,6 +852,7 @@  static void cma_id_put(struct rdma_id_private *id_priv)
 	id_priv->id.qp_type = qp_type;
 	id_priv->tos_set = false;
 	id_priv->timeout_set = false;
+	id_priv->min_rnr_timer_set = false;
 	id_priv->gid_type = IB_GID_TYPE_IB;
 	spin_lock_init(&id_priv->lock);
 	mutex_init(&id_priv->qp_mutex);
@@ -1141,6 +1142,9 @@  int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
 		qp_attr->timeout = id_priv->timeout;
 
+	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
+		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
+
 	return ret;
 }
 EXPORT_SYMBOL(rdma_init_qp_attr);
@@ -2615,6 +2619,43 @@  int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
 }
 EXPORT_SYMBOL(rdma_set_ack_timeout);
 
+/**
+ * rdma_set_min_rnr_timer() - Set the minimum RNR Retry timer of the
+ *			      QP associated with a connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @min_rnr_timer: 5-bit value encoded as Table 45: "Encoding for RNR NAK
+ *		   Timer Field" in the IBTA specification.
+ *
+ * This function should be called before rdma_connect() on active
+ * side, and on passive side before rdma_accept(). The timer value
+ * will be associated with the local QP. When it receives a send it is
+ * not read to handle, typically if the receive queue is empty, an RNR
+ * Retry NAK is returned to the requester with the min_rnr_timer
+ * encoded. The requester will then wait at least the time specified
+ * in the NAK before retrying. The default is zero, which translates
+ * to a minimum RNR Timer value of 655 ms.
+ *
+ * Return: 0 for success
+ */
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
+{
+	struct rdma_id_private *id_priv;
+
+	/* It is a five-bit value */
+	if (min_rnr_timer & 0xe0)
+		return -EINVAL;
+
+	if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT)
+		return -EINVAL;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	id_priv->min_rnr_timer = min_rnr_timer;
+	id_priv->min_rnr_timer_set = true;
+
+	return 0;
+}
+EXPORT_SYMBOL(rdma_set_min_rnr_timer);
+
 static void cma_query_handler(int status, struct sa_path_rec *path_rec,
 			      void *context)
 {
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index caece96..bf83d32 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -86,9 +86,11 @@  struct rdma_id_private {
 	u8			tos;
 	u8			tos_set:1;
 	u8                      timeout_set:1;
+	u8			min_rnr_timer_set:1;
 	u8			reuseaddr;
 	u8			afonly;
 	u8			timeout;
+	u8			min_rnr_timer;
 	enum ib_gid_type	gid_type;
 
 	/*
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 32a67af..8b0f66e 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -331,6 +331,8 @@  int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
 
 int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
+
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer);
  /**
  * rdma_get_service_id - Return the IB service ID for a specified address.
  * @id: Communication identifier associated with the address.