diff mbox

[12/12] IB/srp: Make CM timeout dependent on subnet timeout

Message ID 5541EFB3.6030704@sandisk.com (mailing list archive)
State Rejected
Headers show

Commit Message

Bart Van Assche April 30, 2015, 9:02 a.m. UTC
In large networks the subnet timeout may have been set to a value
above 20. In small networks it can be safe to reduce the subnet
timeout below 20. The CM timeout should be proportional to the
subnet timeout. Hence make the CM timeout dependent on the subnet
timeout. Since the default subnet timeout used by OpenSM is 18
this patch does not change the CM timeout if the default OpenSM
subnet timeout is used.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
---
 drivers/infiniband/ulp/srp/ib_srp.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

Comments

Sagi Grimberg April 30, 2015, 10:27 a.m. UTC | #1
On 4/30/2015 12:02 PM, Bart Van Assche wrote:
> In large networks the subnet timeout may have been set to a value
> above 20. In small networks it can be safe to reduce the subnet
> timeout below 20. The CM timeout should be proportional to the
> subnet timeout. Hence make the CM timeout dependent on the subnet
> timeout. Since the default subnet timeout used by OpenSM is 18
> this patch does not change the CM timeout if the default OpenSM
> subnet timeout is used.
>
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Sagi Grimberg <sagig@mellanox.com>
> Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
> ---
>   drivers/infiniband/ulp/srp/ib_srp.c | 20 ++++++++++++++++++--
>   1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
> index 27d3a64..a18a2ae 100644
> --- a/drivers/infiniband/ulp/srp/ib_srp.c
> +++ b/drivers/infiniband/ulp/srp/ib_srp.c
> @@ -704,6 +704,19 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
>   	return ch->status;
>   }
>
> +static u8 srp_get_subnet_timeout(struct srp_host *host)
> +{
> +	struct ib_port_attr attr;
> +	int ret;
> +	u8 subnet_timeout = 18;
> +
> +	ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
> +	if (ret == 0)
> +		subnet_timeout = attr.subnet_timeout;
> +
> +	return subnet_timeout;
> +}
> +
>   static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
>   {
>   	struct srp_target_port *target = ch->target;
> @@ -711,8 +724,11 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
>   		struct ib_cm_req_param param;
>   		struct srp_login_req   priv;
>   	} *req = NULL;
> +	u8 subnet_timeout;
>   	int status;
>
> +	subnet_timeout = srp_get_subnet_timeout(target->srp_host);

Is this really something that srp initiator should look at?
I'd say that this needs to be a ib_cm code in case the input timeout
values are not set (e.g. zero).
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bart Van Assche April 30, 2015, 10:45 a.m. UTC | #2
On 04/30/15 12:27, Sagi Grimberg wrote:
> On 4/30/2015 12:02 PM, Bart Van Assche wrote:
>> In large networks the subnet timeout may have been set to a value
>> above 20. In small networks it can be safe to reduce the subnet
>> timeout below 20. The CM timeout should be proportional to the
>> subnet timeout. Hence make the CM timeout dependent on the subnet
>> timeout. Since the default subnet timeout used by OpenSM is 18
>> this patch does not change the CM timeout if the default OpenSM
>> subnet timeout is used.
>>
>> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
>> Cc: Sagi Grimberg <sagig@mellanox.com>
>> Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
>> ---
>>    drivers/infiniband/ulp/srp/ib_srp.c | 20 ++++++++++++++++++--
>>    1 file changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
>> index 27d3a64..a18a2ae 100644
>> --- a/drivers/infiniband/ulp/srp/ib_srp.c
>> +++ b/drivers/infiniband/ulp/srp/ib_srp.c
>> @@ -704,6 +704,19 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
>>    	return ch->status;
>>    }
>>
>> +static u8 srp_get_subnet_timeout(struct srp_host *host)
>> +{
>> +	struct ib_port_attr attr;
>> +	int ret;
>> +	u8 subnet_timeout = 18;
>> +
>> +	ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
>> +	if (ret == 0)
>> +		subnet_timeout = attr.subnet_timeout;
>> +
>> +	return subnet_timeout;
>> +}
>> +
>>    static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
>>    {
>>    	struct srp_target_port *target = ch->target;
>> @@ -711,8 +724,11 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
>>    		struct ib_cm_req_param param;
>>    		struct srp_login_req   priv;
>>    	} *req = NULL;
>> +	u8 subnet_timeout;
>>    	int status;
>>
>> +	subnet_timeout = srp_get_subnet_timeout(target->srp_host);
>
> Is this really something that srp initiator should look at?
> I'd say that this needs to be a ib_cm code in case the input timeout
> values are not set (e.g. zero).

I'm open to suggestions for how to move this functionality into the IB 
CM. Today the IB CM interprets all timeout values as absolute timeouts 
instead of timeouts that are relative to the subnet timeout. If we want 
to move this functionality into the IB CM we need an approach that 
allows the IB CM to discern relative from absolute values. I think this 
is only possible by changing struct ib_cm_req_param. Sean, would you 
perhaps like to share your opinion about this ?

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 27d3a64..a18a2ae 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -704,6 +704,19 @@  static int srp_lookup_path(struct srp_rdma_ch *ch)
 	return ch->status;
 }
 
+static u8 srp_get_subnet_timeout(struct srp_host *host)
+{
+	struct ib_port_attr attr;
+	int ret;
+	u8 subnet_timeout = 18;
+
+	ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
+	if (ret == 0)
+		subnet_timeout = attr.subnet_timeout;
+
+	return subnet_timeout;
+}
+
 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
 {
 	struct srp_target_port *target = ch->target;
@@ -711,8 +724,11 @@  static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
 		struct ib_cm_req_param param;
 		struct srp_login_req   priv;
 	} *req = NULL;
+	u8 subnet_timeout;
 	int status;
 
+	subnet_timeout = srp_get_subnet_timeout(target->srp_host);
+
 	req = kzalloc(sizeof *req, GFP_KERNEL);
 	if (!req)
 		return -ENOMEM;
@@ -734,8 +750,8 @@  static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
 	 * module parameters if anyone cared about setting them.
 	 */
 	req->param.responder_resources	      = 4;
-	req->param.remote_cm_response_timeout = 20;
-	req->param.local_cm_response_timeout  = 20;
+	req->param.remote_cm_response_timeout = subnet_timeout + 2;
+	req->param.local_cm_response_timeout  = subnet_timeout + 2;
 	req->param.retry_count                = target->tl_retry_count;
 	req->param.rnr_retry_count 	      = 7;
 	req->param.max_cm_retries 	      = 15;