diff mbox

IB: fix task hanging on error recovery

Message ID 1350683896-3928-1-git-send-email-klebers@linux.vnet.ibm.com (mailing list archive)
State Rejected
Headers show

Commit Message

Kleber Sacilotto de Souza Oct. 19, 2012, 9:58 p.m. UTC
During PCI error recovery, the calls to wait_for_completion() in the
infiniband core path can hang waiting for some tasks that will never
complete, since the hardware is nonfunctional.

INFO: task eehd:16029 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
message.
eehd            D 0000000000000000  9664 16029   3093 0x00000080
Call Trace:
[c0000000af8430e0] [c0000000be05e200] 0xc0000000be05e200 (unreliable)
[c0000000af8432b0] [c000000000014070] .__switch_to+0x130/0x250
[c0000000af843360] [c000000000777c5c] .__schedule+0x40c/0x920
[c0000000af843600] [c000000000775a10] .schedule_timeout+0x210/0x260
[c0000000af8436e0] [c000000000777658] .wait_for_common+0xf8/0x210
[c0000000af8437c0] [d000000004113538] .ib_unregister_mad_agent+0x498/0x690 [ib_mad]
[c0000000af8438c0] [d0000000042c01f4] .ib_sa_remove_one+0xe4/0x180 [ib_sa]
[c0000000af843970] [d0000000040952e8] .ib_unregister_device+0x78/0x170 [ib_core]
[c0000000af843a10] [d000000004183290] .mlx4_ib_remove+0x40/0x1f0 [mlx4_ib]
[c0000000af843ab0] [d000000003f6ab54] .mlx4_remove_device+0xd4/0x110 [mlx4_core]
[c0000000af843b40] [d000000003f6abfc] .mlx4_unregister_device+0x6c/0xf0 [mlx4_core]
[c0000000af843be0] [d000000003f6f19c] .mlx4_remove_one+0x10c/0x3a0 [mlx4_core]
[c0000000af843c80] [d000000003f6f448] .mlx4_pci_err_detected+0x18/0x40 [mlx4_core]
[c0000000af843d00] [c000000000058a50] .eeh_report_error+0x70/0xe0
[c0000000af843d90] [c0000000003e4d94] .pci_walk_bus+0xa4/0x140
[c0000000af843e50] [c000000000058628] .handle_eeh_events+0x1f8/0x480
[c0000000af843f00] [c000000000058dfc] .eeh_event_handler+0x13c/0x1e0
[c0000000af843f90] [c00000000002031c] .kernel_thread+0x54/0x70

This patch fixes the issue by replacing the calls to
wait_for_completion() by wait_for_completion_timeout(), providing a
timeout of 5 seconds to wait for the normal completion of the task.

Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
---
 drivers/infiniband/core/addr.c        |    2 +-
 drivers/infiniband/core/cm.c          |    2 +-
 drivers/infiniband/core/cma.c         |    4 ++--
 drivers/infiniband/core/iwcm.c        |    2 +-
 drivers/infiniband/core/mad.c         |    4 ++--
 drivers/infiniband/core/mad_rmpp.c    |    2 +-
 drivers/infiniband/core/multicast.c   |    4 ++--
 drivers/infiniband/core/sa_query.c    |    2 +-
 drivers/infiniband/core/ucm.c         |    2 +-
 drivers/infiniband/core/ucma.c        |    2 +-
 drivers/infiniband/core/uverbs_main.c |    4 ++--
 11 files changed, 15 insertions(+), 15 deletions(-)

Comments

Kleber Sacilotto de Souza Oct. 30, 2012, 1:04 p.m. UTC | #1
Hi,

Any comments about this one?


Thank you,
Kleber


On 10/19/2012 06:58 PM, Kleber Sacilotto de Souza wrote:
> During PCI error recovery, the calls to wait_for_completion() in the
> infiniband core path can hang waiting for some tasks that will never
> complete, since the hardware is nonfunctional.
>
> INFO: task eehd:16029 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
> message.
> eehd            D 0000000000000000  9664 16029   3093 0x00000080
> Call Trace:
> [c0000000af8430e0] [c0000000be05e200] 0xc0000000be05e200 (unreliable)
> [c0000000af8432b0] [c000000000014070] .__switch_to+0x130/0x250
> [c0000000af843360] [c000000000777c5c] .__schedule+0x40c/0x920
> [c0000000af843600] [c000000000775a10] .schedule_timeout+0x210/0x260
> [c0000000af8436e0] [c000000000777658] .wait_for_common+0xf8/0x210
> [c0000000af8437c0] [d000000004113538] .ib_unregister_mad_agent+0x498/0x690 [ib_mad]
> [c0000000af8438c0] [d0000000042c01f4] .ib_sa_remove_one+0xe4/0x180 [ib_sa]
> [c0000000af843970] [d0000000040952e8] .ib_unregister_device+0x78/0x170 [ib_core]
> [c0000000af843a10] [d000000004183290] .mlx4_ib_remove+0x40/0x1f0 [mlx4_ib]
> [c0000000af843ab0] [d000000003f6ab54] .mlx4_remove_device+0xd4/0x110 [mlx4_core]
> [c0000000af843b40] [d000000003f6abfc] .mlx4_unregister_device+0x6c/0xf0 [mlx4_core]
> [c0000000af843be0] [d000000003f6f19c] .mlx4_remove_one+0x10c/0x3a0 [mlx4_core]
> [c0000000af843c80] [d000000003f6f448] .mlx4_pci_err_detected+0x18/0x40 [mlx4_core]
> [c0000000af843d00] [c000000000058a50] .eeh_report_error+0x70/0xe0
> [c0000000af843d90] [c0000000003e4d94] .pci_walk_bus+0xa4/0x140
> [c0000000af843e50] [c000000000058628] .handle_eeh_events+0x1f8/0x480
> [c0000000af843f00] [c000000000058dfc] .eeh_event_handler+0x13c/0x1e0
> [c0000000af843f90] [c00000000002031c] .kernel_thread+0x54/0x70
>
> This patch fixes the issue by replacing the calls to
> wait_for_completion() by wait_for_completion_timeout(), providing a
> timeout of 5 seconds to wait for the normal completion of the task.
>
> Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
> ---
>  drivers/infiniband/core/addr.c        |    2 +-
>  drivers/infiniband/core/cm.c          |    2 +-
>  drivers/infiniband/core/cma.c         |    4 ++--
>  drivers/infiniband/core/iwcm.c        |    2 +-
>  drivers/infiniband/core/mad.c         |    4 ++--
>  drivers/infiniband/core/mad_rmpp.c    |    2 +-
>  drivers/infiniband/core/multicast.c   |    4 ++--
>  drivers/infiniband/core/sa_query.c    |    2 +-
>  drivers/infiniband/core/ucm.c         |    2 +-
>  drivers/infiniband/core/ucma.c        |    2 +-
>  drivers/infiniband/core/uverbs_main.c |    4 ++--
>  11 files changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
> index eaec8d7..be42113 100644
> --- a/drivers/infiniband/core/addr.c
> +++ b/drivers/infiniband/core/addr.c
> @@ -86,7 +86,7 @@ static inline void put_client(struct rdma_addr_client *client)
>  void rdma_addr_unregister_client(struct rdma_addr_client *client)
>  {
>  	put_client(client);
> -	wait_for_completion(&client->comp);
> +	wait_for_completion_timeout(&client->comp, 5*HZ);
>  }
>  EXPORT_SYMBOL(rdma_addr_unregister_client);
>
> diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
> index 394fea2..062a90b 100644
> --- a/drivers/infiniband/core/cm.c
> +++ b/drivers/infiniband/core/cm.c
> @@ -910,7 +910,7 @@ retest:
>
>  	cm_free_id(cm_id->local_id);
>  	cm_deref_id(cm_id_priv);
> -	wait_for_completion(&cm_id_priv->comp);
> +	wait_for_completion_timeout(&cm_id_priv->comp, 5*HZ);
>  	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
>  		cm_free_work(work);
>  	kfree(cm_id_priv->compare_data);
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index a7568c3..c85d078 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -944,7 +944,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
>
>  	cma_release_port(id_priv);
>  	cma_deref_id(id_priv);
> -	wait_for_completion(&id_priv->comp);
> +	wait_for_completion_timeout(&id_priv->comp, 5*HZ);
>
>  	if (id_priv->internal_id)
>  		cma_deref_id(id_priv->id.context);
> @@ -3388,7 +3388,7 @@ static void cma_process_remove(struct cma_device *cma_dev)
>  	mutex_unlock(&lock);
>
>  	cma_deref_dev(cma_dev);
> -	wait_for_completion(&cma_dev->comp);
> +	wait_for_completion_timeout(&cma_dev->comp, 5*HZ);
>  }
>
>  static void cma_remove_one(struct ib_device *device)
> diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
> index 0bb99bb..7457195 100644
> --- a/drivers/infiniband/core/iwcm.c
> +++ b/drivers/infiniband/core/iwcm.c
> @@ -399,7 +399,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
>
>  	destroy_cm_id(cm_id);
>
> -	wait_for_completion(&cm_id_priv->destroy_comp);
> +	wait_for_completion_timeout(&cm_id_priv->destroy_comp, 5*HZ);
>
>  	free_cm_id(cm_id_priv);
>  }
> diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
> index dc3fd1e..bf94a67 100644
> --- a/drivers/infiniband/core/mad.c
> +++ b/drivers/infiniband/core/mad.c
> @@ -539,7 +539,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
>  	ib_cancel_rmpp_recvs(mad_agent_priv);
>
>  	deref_mad_agent(mad_agent_priv);
> -	wait_for_completion(&mad_agent_priv->comp);
> +	wait_for_completion_timeout(&mad_agent_priv->comp, 5*HZ);
>
>  	kfree(mad_agent_priv->reg_req);
>  	ib_dereg_mr(mad_agent_priv->agent.mr);
> @@ -558,7 +558,7 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
>  	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
>
>  	deref_snoop_agent(mad_snoop_priv);
> -	wait_for_completion(&mad_snoop_priv->comp);
> +	wait_for_completion_timeout(&mad_snoop_priv->comp, 5*HZ);
>
>  	kfree(mad_snoop_priv);
>  }
> diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
> index f37878c..562bf84 100644
> --- a/drivers/infiniband/core/mad_rmpp.c
> +++ b/drivers/infiniband/core/mad_rmpp.c
> @@ -78,7 +78,7 @@ static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
>  static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
>  {
>  	deref_rmpp_recv(rmpp_recv);
> -	wait_for_completion(&rmpp_recv->comp);
> +	wait_for_completion_timeout(&rmpp_recv->comp, 5*HZ);
>  	ib_destroy_ah(rmpp_recv->ah);
>  	kfree(rmpp_recv);
>  }
> diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
> index d2360a8..0cba0c6 100644
> --- a/drivers/infiniband/core/multicast.c
> +++ b/drivers/infiniband/core/multicast.c
> @@ -684,7 +684,7 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
>  	}
>
>  	deref_member(member);
> -	wait_for_completion(&member->comp);
> +	wait_for_completion_timeout(&member->comp, 5*HZ);
>  	ib_sa_client_put(member->client);
>  	kfree(member);
>  }
> @@ -862,7 +862,7 @@ static void mcast_remove_one(struct ib_device *device)
>  		    IB_LINK_LAYER_INFINIBAND) {
>  			port = &dev->port[i];
>  			deref_port(port);
> -			wait_for_completion(&port->comp);
> +			wait_for_completion_timeout(&port->comp, 5*HZ);
>  		}
>  	}
>
> diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
> index a8905ab..25547fe 100644
> --- a/drivers/infiniband/core/sa_query.c
> +++ b/drivers/infiniband/core/sa_query.c
> @@ -474,7 +474,7 @@ EXPORT_SYMBOL(ib_sa_register_client);
>  void ib_sa_unregister_client(struct ib_sa_client *client)
>  {
>  	ib_sa_client_put(client);
> -	wait_for_completion(&client->comp);
> +	wait_for_completion_timeout(&client->comp, 5*HZ);
>  }
>  EXPORT_SYMBOL(ib_sa_unregister_client);
>
> diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
> index 49b15ac..6358941 100644
> --- a/drivers/infiniband/core/ucm.c
> +++ b/drivers/infiniband/core/ucm.c
> @@ -550,7 +550,7 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
>  		return PTR_ERR(ctx);
>
>  	ib_ucm_ctx_put(ctx);
> -	wait_for_completion(&ctx->comp);
> +	wait_for_completion_timeout(&ctx->comp, 5*HZ);
>
>  	/* No new events will be generated after destroying the cm_id. */
>  	ib_destroy_cm_id(ctx->cm_id);
> diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
> index 2709ff5..4b4eb29 100644
> --- a/drivers/infiniband/core/ucma.c
> +++ b/drivers/infiniband/core/ucma.c
> @@ -516,7 +516,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
>  		return PTR_ERR(ctx);
>
>  	ucma_put_ctx(ctx);
> -	wait_for_completion(&ctx->comp);
> +	wait_for_completion_timeout(&ctx->comp, 5*HZ);
>  	resp.events_reported = ucma_free_ctx(ctx);
>
>  	if (copy_to_user((void __user *)(unsigned long)cmd.response,
> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
> index 6f2ce6f..0824b92 100644
> --- a/drivers/infiniband/core/uverbs_main.c
> +++ b/drivers/infiniband/core/uverbs_main.c
> @@ -819,7 +819,7 @@ err_cdev:
>
>  err:
>  	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
> -	wait_for_completion(&uverbs_dev->comp);
> +	wait_for_completion_timeout(&uverbs_dev->comp, 5*HZ);
>  	kfree(uverbs_dev);
>  	return;
>  }
> @@ -841,7 +841,7 @@ static void ib_uverbs_remove_one(struct ib_device *device)
>  		clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
>
>  	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
> -	wait_for_completion(&uverbs_dev->comp);
> +	wait_for_completion_timeout(&uverbs_dev->comp, 5*HZ);
>  	kfree(uverbs_dev);
>  }
>
Or Gerlitz Nov. 1, 2012, 3:12 p.m. UTC | #2
On 19/10/2012 23:58, Kleber Sacilotto de Souza wrote:
> During PCI error recovery, the calls to wait_for_completion() in the
> infiniband core path can hang waiting for some tasks that will never
> complete, since the hardware is nonfunctional.
>
> INFO: task eehd:16029 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
> message.
> eehd            D 0000000000000000  9664 16029   3093 0x00000080
> Call Trace:
> [c0000000af8430e0] [c0000000be05e200] 0xc0000000be05e200 (unreliable)
> [c0000000af8432b0] [c000000000014070] .__switch_to+0x130/0x250
> [c0000000af843360] [c000000000777c5c] .__schedule+0x40c/0x920
> [c0000000af843600] [c000000000775a10] .schedule_timeout+0x210/0x260
> [c0000000af8436e0] [c000000000777658] .wait_for_common+0xf8/0x210
> [c0000000af8437c0] [d000000004113538] .ib_unregister_mad_agent+0x498/0x690 [ib_mad]
> [c0000000af8438c0] [d0000000042c01f4] .ib_sa_remove_one+0xe4/0x180 [ib_sa]
> [c0000000af843970] [d0000000040952e8] .ib_unregister_device+0x78/0x170 [ib_core]
> [c0000000af843a10] [d000000004183290] .mlx4_ib_remove+0x40/0x1f0 [mlx4_ib]
> [c0000000af843ab0] [d000000003f6ab54] .mlx4_remove_device+0xd4/0x110 [mlx4_core]
> [c0000000af843b40] [d000000003f6abfc] .mlx4_unregister_device+0x6c/0xf0 [mlx4_core]
> [c0000000af843be0] [d000000003f6f19c] .mlx4_remove_one+0x10c/0x3a0 [mlx4_core]
> [c0000000af843c80] [d000000003f6f448] .mlx4_pci_err_detected+0x18/0x40 [mlx4_core]
> [c0000000af843d00] [c000000000058a50] .eeh_report_error+0x70/0xe0
> [c0000000af843d90] [c0000000003e4d94] .pci_walk_bus+0xa4/0x140
> [c0000000af843e50] [c000000000058628] .handle_eeh_events+0x1f8/0x480
> [c0000000af843f00] [c000000000058dfc] .eeh_event_handler+0x13c/0x1e0
> [c0000000af843f90] [c00000000002031c] .kernel_thread+0x54/0x70
>
> This patch fixes the issue by replacing the calls to
> wait_for_completion() by wait_for_completion_timeout(), providing a
> timeout of 5 seconds to wait for the normal completion of the task.
>
> Signed-off-by: Kleber Sacilotto de Souza<klebers@linux.vnet.ibm.com>
> ---
>   drivers/infiniband/core/addr.c        |    2 +-
>   drivers/infiniband/core/cm.c          |    2 +-
>   drivers/infiniband/core/cma.c         |    4 ++--
>   drivers/infiniband/core/iwcm.c        |    2 +-
>   drivers/infiniband/core/mad.c         |    4 ++--
>   drivers/infiniband/core/mad_rmpp.c    |    2 +-
>   drivers/infiniband/core/multicast.c   |    4 ++--
>   drivers/infiniband/core/sa_query.c    |    2 +-

for all the above files, your patch makes sense, since the 
completion/refcount logic
relate to waiting for event that needs to come from the device.

>   drivers/infiniband/core/ucm.c         |    2 +-
>   drivers/infiniband/core/ucma.c        |    2 +-

On these files, as far as I understand this code from quick looking, I'm 
not sure on what
exactly the completion objects protects, Sean?

Or.

>   drivers/infiniband/core/uverbs_main.c |    4 ++--


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hal Rosenstock Nov. 1, 2012, 3:30 p.m. UTC | #3
On 11/1/2012 11:12 AM, Or Gerlitz wrote:
> On 19/10/2012 23:58, Kleber Sacilotto de Souza wrote:
>> During PCI error recovery, the calls to wait_for_completion() in the
>> infiniband core path can hang waiting for some tasks that will never
>> complete, since the hardware is nonfunctional.
>>
>> INFO: task eehd:16029 blocked for more than 120 seconds.
>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
>> message.
>> eehd            D 0000000000000000  9664 16029   3093 0x00000080
>> Call Trace:
>> [c0000000af8430e0] [c0000000be05e200] 0xc0000000be05e200 (unreliable)
>> [c0000000af8432b0] [c000000000014070] .__switch_to+0x130/0x250
>> [c0000000af843360] [c000000000777c5c] .__schedule+0x40c/0x920
>> [c0000000af843600] [c000000000775a10] .schedule_timeout+0x210/0x260
>> [c0000000af8436e0] [c000000000777658] .wait_for_common+0xf8/0x210
>> [c0000000af8437c0] [d000000004113538]
>> .ib_unregister_mad_agent+0x498/0x690 [ib_mad]
>> [c0000000af8438c0] [d0000000042c01f4] .ib_sa_remove_one+0xe4/0x180
>> [ib_sa]
>> [c0000000af843970] [d0000000040952e8] .ib_unregister_device+0x78/0x170
>> [ib_core]
>> [c0000000af843a10] [d000000004183290] .mlx4_ib_remove+0x40/0x1f0
>> [mlx4_ib]
>> [c0000000af843ab0] [d000000003f6ab54] .mlx4_remove_device+0xd4/0x110
>> [mlx4_core]
>> [c0000000af843b40] [d000000003f6abfc]
>> .mlx4_unregister_device+0x6c/0xf0 [mlx4_core]
>> [c0000000af843be0] [d000000003f6f19c] .mlx4_remove_one+0x10c/0x3a0
>> [mlx4_core]
>> [c0000000af843c80] [d000000003f6f448] .mlx4_pci_err_detected+0x18/0x40
>> [mlx4_core]
>> [c0000000af843d00] [c000000000058a50] .eeh_report_error+0x70/0xe0
>> [c0000000af843d90] [c0000000003e4d94] .pci_walk_bus+0xa4/0x140
>> [c0000000af843e50] [c000000000058628] .handle_eeh_events+0x1f8/0x480
>> [c0000000af843f00] [c000000000058dfc] .eeh_event_handler+0x13c/0x1e0
>> [c0000000af843f90] [c00000000002031c] .kernel_thread+0x54/0x70
>>
>> This patch fixes the issue by replacing the calls to
>> wait_for_completion() by wait_for_completion_timeout(), providing a
>> timeout of 5 seconds to wait for the normal completion of the task.
>>
>> Signed-off-by: Kleber Sacilotto de Souza<klebers@linux.vnet.ibm.com>
>> ---
>>   drivers/infiniband/core/addr.c        |    2 +-
>>   drivers/infiniband/core/cm.c          |    2 +-
>>   drivers/infiniband/core/cma.c         |    4 ++--
>>   drivers/infiniband/core/iwcm.c        |    2 +-
>>   drivers/infiniband/core/mad.c         |    4 ++--
>>   drivers/infiniband/core/mad_rmpp.c    |    2 +-
>>   drivers/infiniband/core/multicast.c   |    4 ++--
>>   drivers/infiniband/core/sa_query.c    |    2 +-
> 
> for all the above files, your patch makes sense, since the
> completion/refcount logic
> relate to waiting for event that needs to come from the device.

Is this a fix or a workaround for some other underlying issue which
needs fixing ?

What happens when modules are reloaded after this ? Do things still work
? Are resources lost ?

-- Hal

> 
>>   drivers/infiniband/core/ucm.c         |    2 +-
>>   drivers/infiniband/core/ucma.c        |    2 +-
> 
> On these files, as far as I understand this code from quick looking, I'm
> not sure on what
> exactly the completion objects protects, Sean?
> 
> Or.
> 
>>   drivers/infiniband/core/uverbs_main.c |    4 ++--
> 
> 
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kleber Sacilotto de Souza Nov. 1, 2012, 5:53 p.m. UTC | #4
On 11/01/2012 01:30 PM, Hal Rosenstock wrote:
> On 11/1/2012 11:12 AM, Or Gerlitz wrote:
>> On 19/10/2012 23:58, Kleber Sacilotto de Souza wrote:
>>> During PCI error recovery, the calls to wait_for_completion() in the
>>> infiniband core path can hang waiting for some tasks that will never
>>> complete, since the hardware is nonfunctional.
>>>
>>> INFO: task eehd:16029 blocked for more than 120 seconds.
>>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
>>> message.
>>> eehd            D 0000000000000000  9664 16029   3093 0x00000080
>>> Call Trace:
>>> [c0000000af8430e0] [c0000000be05e200] 0xc0000000be05e200 (unreliable)
>>> [c0000000af8432b0] [c000000000014070] .__switch_to+0x130/0x250
>>> [c0000000af843360] [c000000000777c5c] .__schedule+0x40c/0x920
>>> [c0000000af843600] [c000000000775a10] .schedule_timeout+0x210/0x260
>>> [c0000000af8436e0] [c000000000777658] .wait_for_common+0xf8/0x210
>>> [c0000000af8437c0] [d000000004113538]
>>> .ib_unregister_mad_agent+0x498/0x690 [ib_mad]
>>> [c0000000af8438c0] [d0000000042c01f4] .ib_sa_remove_one+0xe4/0x180
>>> [ib_sa]
>>> [c0000000af843970] [d0000000040952e8] .ib_unregister_device+0x78/0x170
>>> [ib_core]
>>> [c0000000af843a10] [d000000004183290] .mlx4_ib_remove+0x40/0x1f0
>>> [mlx4_ib]
>>> [c0000000af843ab0] [d000000003f6ab54] .mlx4_remove_device+0xd4/0x110
>>> [mlx4_core]
>>> [c0000000af843b40] [d000000003f6abfc]
>>> .mlx4_unregister_device+0x6c/0xf0 [mlx4_core]
>>> [c0000000af843be0] [d000000003f6f19c] .mlx4_remove_one+0x10c/0x3a0
>>> [mlx4_core]
>>> [c0000000af843c80] [d000000003f6f448] .mlx4_pci_err_detected+0x18/0x40
>>> [mlx4_core]
>>> [c0000000af843d00] [c000000000058a50] .eeh_report_error+0x70/0xe0
>>> [c0000000af843d90] [c0000000003e4d94] .pci_walk_bus+0xa4/0x140
>>> [c0000000af843e50] [c000000000058628] .handle_eeh_events+0x1f8/0x480
>>> [c0000000af843f00] [c000000000058dfc] .eeh_event_handler+0x13c/0x1e0
>>> [c0000000af843f90] [c00000000002031c] .kernel_thread+0x54/0x70
>>>
>>> This patch fixes the issue by replacing the calls to
>>> wait_for_completion() by wait_for_completion_timeout(), providing a
>>> timeout of 5 seconds to wait for the normal completion of the task.
>>>
>>> Signed-off-by: Kleber Sacilotto de Souza<klebers@linux.vnet.ibm.com>
>>> ---
>>>   drivers/infiniband/core/addr.c        |    2 +-
>>>   drivers/infiniband/core/cm.c          |    2 +-
>>>   drivers/infiniband/core/cma.c         |    4 ++--
>>>   drivers/infiniband/core/iwcm.c        |    2 +-
>>>   drivers/infiniband/core/mad.c         |    4 ++--
>>>   drivers/infiniband/core/mad_rmpp.c    |    2 +-
>>>   drivers/infiniband/core/multicast.c   |    4 ++--
>>>   drivers/infiniband/core/sa_query.c    |    2 +-
>> for all the above files, your patch makes sense, since the
>> completion/refcount logic
>> relate to waiting for event that needs to come from the device.
> Is this a fix or a workaround for some other underlying issue which
> needs fixing ?
>
> What happens when modules are reloaded after this ? Do things still work
> ? Are resources lost ?

After the error is recovered, the IB devices resume their normal operations,
and the modules can be reloaded successfully.

>
> -- Hal
>
>>>   drivers/infiniband/core/ucm.c         |    2 +-
>>>   drivers/infiniband/core/ucma.c        |    2 +-
>> On these files, as far as I understand this code from quick looking, I'm
>> not sure on what
>> exactly the completion objects protects, Sean?
>>
>> Or.
>>
>>>   drivers/infiniband/core/uverbs_main.c |    4 ++--
>>
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
Hal Rosenstock Nov. 2, 2012, 1:24 p.m. UTC | #5
On 11/1/2012 1:53 PM, Kleber Sacilotto de Souza wrote:
> On 11/01/2012 01:30 PM, Hal Rosenstock wrote:
>> On 11/1/2012 11:12 AM, Or Gerlitz wrote:
>>> On 19/10/2012 23:58, Kleber Sacilotto de Souza wrote:
>>>> During PCI error recovery, the calls to wait_for_completion() in the
>>>> infiniband core path can hang waiting for some tasks that will never
>>>> complete, since the hardware is nonfunctional.
>>>>
>>>> INFO: task eehd:16029 blocked for more than 120 seconds.
>>>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
>>>> message.
>>>> eehd            D 0000000000000000  9664 16029   3093 0x00000080
>>>> Call Trace:
>>>> [c0000000af8430e0] [c0000000be05e200] 0xc0000000be05e200 (unreliable)
>>>> [c0000000af8432b0] [c000000000014070] .__switch_to+0x130/0x250
>>>> [c0000000af843360] [c000000000777c5c] .__schedule+0x40c/0x920
>>>> [c0000000af843600] [c000000000775a10] .schedule_timeout+0x210/0x260
>>>> [c0000000af8436e0] [c000000000777658] .wait_for_common+0xf8/0x210
>>>> [c0000000af8437c0] [d000000004113538]
>>>> .ib_unregister_mad_agent+0x498/0x690 [ib_mad]
>>>> [c0000000af8438c0] [d0000000042c01f4] .ib_sa_remove_one+0xe4/0x180
>>>> [ib_sa]
>>>> [c0000000af843970] [d0000000040952e8] .ib_unregister_device+0x78/0x170
>>>> [ib_core]
>>>> [c0000000af843a10] [d000000004183290] .mlx4_ib_remove+0x40/0x1f0
>>>> [mlx4_ib]
>>>> [c0000000af843ab0] [d000000003f6ab54] .mlx4_remove_device+0xd4/0x110
>>>> [mlx4_core]
>>>> [c0000000af843b40] [d000000003f6abfc]
>>>> .mlx4_unregister_device+0x6c/0xf0 [mlx4_core]
>>>> [c0000000af843be0] [d000000003f6f19c] .mlx4_remove_one+0x10c/0x3a0
>>>> [mlx4_core]
>>>> [c0000000af843c80] [d000000003f6f448] .mlx4_pci_err_detected+0x18/0x40
>>>> [mlx4_core]
>>>> [c0000000af843d00] [c000000000058a50] .eeh_report_error+0x70/0xe0
>>>> [c0000000af843d90] [c0000000003e4d94] .pci_walk_bus+0xa4/0x140
>>>> [c0000000af843e50] [c000000000058628] .handle_eeh_events+0x1f8/0x480
>>>> [c0000000af843f00] [c000000000058dfc] .eeh_event_handler+0x13c/0x1e0
>>>> [c0000000af843f90] [c00000000002031c] .kernel_thread+0x54/0x70
>>>>
>>>> This patch fixes the issue by replacing the calls to
>>>> wait_for_completion() by wait_for_completion_timeout(), providing a
>>>> timeout of 5 seconds to wait for the normal completion of the task.
>>>>
>>>> Signed-off-by: Kleber Sacilotto de Souza<klebers@linux.vnet.ibm.com>
>>>> ---
>>>>   drivers/infiniband/core/addr.c        |    2 +-
>>>>   drivers/infiniband/core/cm.c          |    2 +-
>>>>   drivers/infiniband/core/cma.c         |    4 ++--
>>>>   drivers/infiniband/core/iwcm.c        |    2 +-
>>>>   drivers/infiniband/core/mad.c         |    4 ++--
>>>>   drivers/infiniband/core/mad_rmpp.c    |    2 +-
>>>>   drivers/infiniband/core/multicast.c   |    4 ++--
>>>>   drivers/infiniband/core/sa_query.c    |    2 +-
>>> for all the above files, your patch makes sense, since the
>>> completion/refcount logic
>>> relate to waiting for event that needs to come from the device.
>> Is this a fix or a workaround for some other underlying issue which
>> needs fixing ?
>>
>> What happens when modules are reloaded after this ? Do things still work
>> ? Are resources lost ?
> 
> After the error is recovered, the IB devices resume their normal operations,
> and the modules can be reloaded successfully.

Isn't this paving over some lower layer issue where the required
completions aren't issued ? Shouldn't that be found and fixed ?

-- Hal

> 
>>
>> -- Hal
>>
>>>>   drivers/infiniband/core/ucm.c         |    2 +-
>>>>   drivers/infiniband/core/ucma.c        |    2 +-
>>> On these files, as far as I understand this code from quick looking, I'm
>>> not sure on what
>>> exactly the completion objects protects, Sean?
>>>
>>> Or.
>>>
>>>>   drivers/infiniband/core/uverbs_main.c |    4 ++--
>>>
>>> -- 
>>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roland Dreier Nov. 5, 2012, 5:51 p.m. UTC | #6
On Fri, Oct 19, 2012 at 2:58 PM, Kleber Sacilotto de Souza
<klebers@linux.vnet.ibm.com> wrote:
>  void rdma_addr_unregister_client(struct rdma_addr_client *client)
>  {
>         put_client(client);
> -       wait_for_completion(&client->comp);
> +       wait_for_completion_timeout(&client->comp, 5*HZ);
>  }
>  EXPORT_SYMBOL(rdma_addr_unregister_client);

This type of change looks pretty risky to me.  Aren't we just letting
things continue with a potential use-after-free hanging around?

Where does 5 seconds as a timeout come from?  Can we be confident that
nothing will complete client->comp after 5 seconds?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hefty, Sean Nov. 5, 2012, 6:21 p.m. UTC | #7
> >   drivers/infiniband/core/ucm.c         |    2 +-
> >   drivers/infiniband/core/ucma.c        |    2 +-
> 
> On these files, as far as I understand this code from quick looking, I'm
> not sure on what
> exactly the completion objects protects, Sean?

These protect against the caller trying to destroy an object that is in use.  For example, it handles the case where the user calls 'connect' on a given context, then calls 'destroy' on that same context from another thread.

We shouldn't need to deal with any timeouts at this level.

- Sean
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 5, 2012, 6:38 p.m. UTC | #8
On Mon, Nov 5, 2012 at 7:21 PM, Hefty, Sean <sean.hefty@intel.com> wrote:
>
> > >   drivers/infiniband/core/ucm.c         |    2 +-
> > >   drivers/infiniband/core/ucma.c        |    2 +-
> >
> > On these files, as far as I understand this code from quick looking, I'm
> > not sure on what
> > exactly the completion objects protects, Sean?
>
> These protect against the caller trying to destroy an object that is in
> use.  For example, it handles the case where the user calls 'connect' on a
> given context, then calls 'destroy' on that same context from another
> thread.
>
> We shouldn't need to deal with any timeouts at this level.
>

OK, thanks, this was my thought as well
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hal Rosenstock Nov. 5, 2012, 7:15 p.m. UTC | #9
On 11/5/2012 1:21 PM, Hefty, Sean wrote:
>>>   drivers/infiniband/core/ucm.c         |    2 +-
>>>   drivers/infiniband/core/ucma.c        |    2 +-
>>
>> On these files, as far as I understand this code from quick looking, I'm
>> not sure on what
>> exactly the completion objects protects, Sean?
> 
> These protect against the caller trying to destroy an object that is in use.  For example, it handles the case where the user calls 'connect' on a given context, then calls 'destroy' on that same context from another thread.
> 
> We shouldn't need to deal with any timeouts at this level.

I believe there's some lower level issue which needs to be found and
fixed where some driver is not returning the completions for some reason.

-- Hal

> 
> - Sean
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kleber Sacilotto de Souza Nov. 5, 2012, 8:54 p.m. UTC | #10
On 11/05/2012 05:15 PM, Hal Rosenstock wrote:
> On 11/5/2012 1:21 PM, Hefty, Sean wrote:
>>>>   drivers/infiniband/core/ucm.c         |    2 +-
>>>>   drivers/infiniband/core/ucma.c        |    2 +-
>>> On these files, as far as I understand this code from quick looking, I'm
>>> not sure on what
>>> exactly the completion objects protects, Sean?
>> These protect against the caller trying to destroy an object that is in use.  For example, it handles the case where the user calls 'connect' on a given context, then calls 'destroy' on that same context from another thread.
>>
>> We shouldn't need to deal with any timeouts at this level.
> I believe there's some lower level issue which needs to be found and
> fixed where some driver is not returning the completions for some reason.

The driver is not returning the completions because during EEH (Extended
Error Handling) recovery on powerpc systems the PCI slot is frozen, and
we are not going to receive any interrupt from the adapter until we
remove it completely and reset it.

Kleber

>
> -- Hal
>
>> - Sean
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
Or Gerlitz Nov. 6, 2012, 5:30 a.m. UTC | #11
> On Mon, Nov 5, 2012 at 9:54 PM, Kleber Sacilotto de Souza
> <klebers@linux.vnet.ibm.com> wrote:
>>
>>
>> The driver is not returning the completions because during EEH (Extended
>> Error Handling) recovery on powerpc systems the PCI slot is frozen, and
>> we are not going to receive any interrupt from the adapter until we
>> remove it completely and reset it.
>>

maybe, but per Sean's comment this possible problem is irrelevant for ucm/ucma
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kleber Sacilotto de Souza Nov. 6, 2012, 10:58 a.m. UTC | #12
On 11/06/2012 03:30 AM, Or Gerlitz wrote:
>> On Mon, Nov 5, 2012 at 9:54 PM, Kleber Sacilotto de Souza
>> <klebers@linux.vnet.ibm.com> wrote:
>>>
>>> The driver is not returning the completions because during EEH (Extended
>>> Error Handling) recovery on powerpc systems the PCI slot is frozen, and
>>> we are not going to receive any interrupt from the adapter until we
>>> remove it completely and reset it.
>>>
> maybe, but per Sean's comment this possible problem is irrelevant for ucm/ucma
>
During my tests I've seen the wait_for_completion() call hanging on
different parts of the code, but not on ucm/ucma. So would it be OK to
change the other calls and leave the ucm/ucma as it is?

Thank you.
Or Gerlitz Nov. 6, 2012, 5:44 p.m. UTC | #13
On Tue, Nov 6, 2012 at 11:58 AM, Kleber Sacilotto de Souza
<klebers@linux.vnet.ibm.com> wrote:

> During my tests I've seen the wait_for_completion() call hanging on
> different parts of the code, but not on ucm/ucma. So would it be OK to
> change the other calls and leave the ucm/ucma as it is?

ucm/ucma you should remove in V1, I'm not fully sure on the uverbs
part, it needs more review,
the other parts are OK to me.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 6, 2012, 5:45 p.m. UTC | #14
On Tue, Nov 6, 2012 at 6:44 PM, Or Gerlitz <or.gerlitz@gmail.com> wrote:
> the other parts are OK to me.

I wanted to say that you stepped on real problem and provided a
solution, Roland's wondering is ofcourse
correct, how do we avoid use after free in very slow non pci hotunplug cases?!
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index eaec8d7..be42113 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -86,7 +86,7 @@  static inline void put_client(struct rdma_addr_client *client)
 void rdma_addr_unregister_client(struct rdma_addr_client *client)
 {
 	put_client(client);
-	wait_for_completion(&client->comp);
+	wait_for_completion_timeout(&client->comp, 5*HZ);
 }
 EXPORT_SYMBOL(rdma_addr_unregister_client);
 
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 394fea2..062a90b 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -910,7 +910,7 @@  retest:
 
 	cm_free_id(cm_id->local_id);
 	cm_deref_id(cm_id_priv);
-	wait_for_completion(&cm_id_priv->comp);
+	wait_for_completion_timeout(&cm_id_priv->comp, 5*HZ);
 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
 		cm_free_work(work);
 	kfree(cm_id_priv->compare_data);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a7568c3..c85d078 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -944,7 +944,7 @@  void rdma_destroy_id(struct rdma_cm_id *id)
 
 	cma_release_port(id_priv);
 	cma_deref_id(id_priv);
-	wait_for_completion(&id_priv->comp);
+	wait_for_completion_timeout(&id_priv->comp, 5*HZ);
 
 	if (id_priv->internal_id)
 		cma_deref_id(id_priv->id.context);
@@ -3388,7 +3388,7 @@  static void cma_process_remove(struct cma_device *cma_dev)
 	mutex_unlock(&lock);
 
 	cma_deref_dev(cma_dev);
-	wait_for_completion(&cma_dev->comp);
+	wait_for_completion_timeout(&cma_dev->comp, 5*HZ);
 }
 
 static void cma_remove_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 0bb99bb..7457195 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -399,7 +399,7 @@  void iw_destroy_cm_id(struct iw_cm_id *cm_id)
 
 	destroy_cm_id(cm_id);
 
-	wait_for_completion(&cm_id_priv->destroy_comp);
+	wait_for_completion_timeout(&cm_id_priv->destroy_comp, 5*HZ);
 
 	free_cm_id(cm_id_priv);
 }
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index dc3fd1e..bf94a67 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -539,7 +539,7 @@  static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 	ib_cancel_rmpp_recvs(mad_agent_priv);
 
 	deref_mad_agent(mad_agent_priv);
-	wait_for_completion(&mad_agent_priv->comp);
+	wait_for_completion_timeout(&mad_agent_priv->comp, 5*HZ);
 
 	kfree(mad_agent_priv->reg_req);
 	ib_dereg_mr(mad_agent_priv->agent.mr);
@@ -558,7 +558,7 @@  static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 
 	deref_snoop_agent(mad_snoop_priv);
-	wait_for_completion(&mad_snoop_priv->comp);
+	wait_for_completion_timeout(&mad_snoop_priv->comp, 5*HZ);
 
 	kfree(mad_snoop_priv);
 }
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index f37878c..562bf84 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -78,7 +78,7 @@  static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
 static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
 {
 	deref_rmpp_recv(rmpp_recv);
-	wait_for_completion(&rmpp_recv->comp);
+	wait_for_completion_timeout(&rmpp_recv->comp, 5*HZ);
 	ib_destroy_ah(rmpp_recv->ah);
 	kfree(rmpp_recv);
 }
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d2360a8..0cba0c6 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -684,7 +684,7 @@  void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
 	}
 
 	deref_member(member);
-	wait_for_completion(&member->comp);
+	wait_for_completion_timeout(&member->comp, 5*HZ);
 	ib_sa_client_put(member->client);
 	kfree(member);
 }
@@ -862,7 +862,7 @@  static void mcast_remove_one(struct ib_device *device)
 		    IB_LINK_LAYER_INFINIBAND) {
 			port = &dev->port[i];
 			deref_port(port);
-			wait_for_completion(&port->comp);
+			wait_for_completion_timeout(&port->comp, 5*HZ);
 		}
 	}
 
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a8905ab..25547fe 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -474,7 +474,7 @@  EXPORT_SYMBOL(ib_sa_register_client);
 void ib_sa_unregister_client(struct ib_sa_client *client)
 {
 	ib_sa_client_put(client);
-	wait_for_completion(&client->comp);
+	wait_for_completion_timeout(&client->comp, 5*HZ);
 }
 EXPORT_SYMBOL(ib_sa_unregister_client);
 
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 49b15ac..6358941 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -550,7 +550,7 @@  static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
 		return PTR_ERR(ctx);
 
 	ib_ucm_ctx_put(ctx);
-	wait_for_completion(&ctx->comp);
+	wait_for_completion_timeout(&ctx->comp, 5*HZ);
 
 	/* No new events will be generated after destroying the cm_id. */
 	ib_destroy_cm_id(ctx->cm_id);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 2709ff5..4b4eb29 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -516,7 +516,7 @@  static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 		return PTR_ERR(ctx);
 
 	ucma_put_ctx(ctx);
-	wait_for_completion(&ctx->comp);
+	wait_for_completion_timeout(&ctx->comp, 5*HZ);
 	resp.events_reported = ucma_free_ctx(ctx);
 
 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 6f2ce6f..0824b92 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -819,7 +819,7 @@  err_cdev:
 
 err:
 	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
-	wait_for_completion(&uverbs_dev->comp);
+	wait_for_completion_timeout(&uverbs_dev->comp, 5*HZ);
 	kfree(uverbs_dev);
 	return;
 }
@@ -841,7 +841,7 @@  static void ib_uverbs_remove_one(struct ib_device *device)
 		clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
 
 	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
-	wait_for_completion(&uverbs_dev->comp);
+	wait_for_completion_timeout(&uverbs_dev->comp, 5*HZ);
 	kfree(uverbs_dev);
 }