diff mbox series

iwcm: don't hold the irq disabled lock on iw_rem_ref

Message ID 20190903192223.17342-1-sagi@grimberg.me (mailing list archive)
State Superseded
Headers show
Series iwcm: don't hold the irq disabled lock on iw_rem_ref | expand

Commit Message

Sagi Grimberg Sept. 3, 2019, 7:22 p.m. UTC
This may be the final put on a qp and result in freeing
resourcesand should not be done with interrupts disabled.

Produce the following warning:
--
[  317.026048] WARNING: CPU: 1 PID: 443 at kernel/smp.c:425 smp_call_function_many+0xa0/0x260
[  317.026131] Call Trace:
[  317.026159]  ? load_new_mm_cr3+0xe0/0xe0
[  317.026161]  on_each_cpu+0x28/0x50
[  317.026183]  __purge_vmap_area_lazy+0x72/0x150
[  317.026200]  free_vmap_area_noflush+0x7a/0x90
[  317.026202]  remove_vm_area+0x6f/0x80
[  317.026203]  __vunmap+0x71/0x210
[  317.026211]  siw_free_qp+0x8d/0x130 [siw]
[  317.026217]  destroy_cm_id+0xc3/0x200 [iw_cm]
[  317.026222]  rdma_destroy_id+0x224/0x2b0 [rdma_cm]
[  317.026226]  nvme_rdma_reset_ctrl_work+0x2c/0x70 [nvme_rdma]
[  317.026235]  process_one_work+0x1f4/0x3e0
[  317.026249]  worker_thread+0x221/0x3e0
[  317.026252]  ? process_one_work+0x3e0/0x3e0
[  317.026256]  kthread+0x117/0x130
[  317.026264]  ? kthread_create_worker_on_cpu+0x70/0x70
[  317.026275]  ret_from_fork+0x35/0x40
--

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/infiniband/core/iwcm.c | 2 ++
 1 file changed, 2 insertions(+)

Comments

Mark Bloch Sept. 3, 2019, 7:33 p.m. UTC | #1
On 9/3/19 12:22 PM, Sagi Grimberg wrote:
> This may be the final put on a qp and result in freeing
> resourcesand should not be done with interrupts disabled.
> 
> Produce the following warning:
> --
> [  317.026048] WARNING: CPU: 1 PID: 443 at kernel/smp.c:425 smp_call_function_many+0xa0/0x260
> [  317.026131] Call Trace:
> [  317.026159]  ? load_new_mm_cr3+0xe0/0xe0
> [  317.026161]  on_each_cpu+0x28/0x50
> [  317.026183]  __purge_vmap_area_lazy+0x72/0x150
> [  317.026200]  free_vmap_area_noflush+0x7a/0x90
> [  317.026202]  remove_vm_area+0x6f/0x80
> [  317.026203]  __vunmap+0x71/0x210
> [  317.026211]  siw_free_qp+0x8d/0x130 [siw]
> [  317.026217]  destroy_cm_id+0xc3/0x200 [iw_cm]
> [  317.026222]  rdma_destroy_id+0x224/0x2b0 [rdma_cm]
> [  317.026226]  nvme_rdma_reset_ctrl_work+0x2c/0x70 [nvme_rdma]
> [  317.026235]  process_one_work+0x1f4/0x3e0
> [  317.026249]  worker_thread+0x221/0x3e0
> [  317.026252]  ? process_one_work+0x3e0/0x3e0
> [  317.026256]  kthread+0x117/0x130
> [  317.026264]  ? kthread_create_worker_on_cpu+0x70/0x70
> [  317.026275]  ret_from_fork+0x35/0x40
> --
> 
> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> ---
>  drivers/infiniband/core/iwcm.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
> index 72141c5b7c95..94566271dbff 100644
> --- a/drivers/infiniband/core/iwcm.c
> +++ b/drivers/infiniband/core/iwcm.c
> @@ -427,7 +427,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
>  		break;
>  	}
>  	if (cm_id_priv->qp) {
> +		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
>  		cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
> +		spin_lock_irqsave(&cm_id_priv->lock, flags);
>  		cm_id_priv->qp = NULL;

Shouldn't you first do cm_id_priv->qp = NULL and only then
unlock and destroy the qp?

Mark
>  	}
>  	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
>
Sagi Grimberg Sept. 4, 2019, 1:07 a.m. UTC | #2
>> This may be the final put on a qp and result in freeing
>> resourcesand should not be done with interrupts disabled.
>>
>> Produce the following warning:
>> --
>> [  317.026048] WARNING: CPU: 1 PID: 443 at kernel/smp.c:425 smp_call_function_many+0xa0/0x260
>> [  317.026131] Call Trace:
>> [  317.026159]  ? load_new_mm_cr3+0xe0/0xe0
>> [  317.026161]  on_each_cpu+0x28/0x50
>> [  317.026183]  __purge_vmap_area_lazy+0x72/0x150
>> [  317.026200]  free_vmap_area_noflush+0x7a/0x90
>> [  317.026202]  remove_vm_area+0x6f/0x80
>> [  317.026203]  __vunmap+0x71/0x210
>> [  317.026211]  siw_free_qp+0x8d/0x130 [siw]
>> [  317.026217]  destroy_cm_id+0xc3/0x200 [iw_cm]
>> [  317.026222]  rdma_destroy_id+0x224/0x2b0 [rdma_cm]
>> [  317.026226]  nvme_rdma_reset_ctrl_work+0x2c/0x70 [nvme_rdma]
>> [  317.026235]  process_one_work+0x1f4/0x3e0
>> [  317.026249]  worker_thread+0x221/0x3e0
>> [  317.026252]  ? process_one_work+0x3e0/0x3e0
>> [  317.026256]  kthread+0x117/0x130
>> [  317.026264]  ? kthread_create_worker_on_cpu+0x70/0x70
>> [  317.026275]  ret_from_fork+0x35/0x40
>> --
>>
>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
>> ---
>>   drivers/infiniband/core/iwcm.c | 2 ++
>>   1 file changed, 2 insertions(+)
>>
>> diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
>> index 72141c5b7c95..94566271dbff 100644
>> --- a/drivers/infiniband/core/iwcm.c
>> +++ b/drivers/infiniband/core/iwcm.c
>> @@ -427,7 +427,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
>>   		break;
>>   	}
>>   	if (cm_id_priv->qp) {
>> +		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
>>   		cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
>> +		spin_lock_irqsave(&cm_id_priv->lock, flags);
>>   		cm_id_priv->qp = NULL;
> 
> Shouldn't you first do cm_id_priv->qp = NULL and only then
> unlock and destroy the qp?

Probably...
Jason Gunthorpe Sept. 4, 2019, 6:03 a.m. UTC | #3
On Tue, Sep 03, 2019 at 07:33:08PM +0000, Mark Bloch wrote:
> 
> 
> On 9/3/19 12:22 PM, Sagi Grimberg wrote:
> > This may be the final put on a qp and result in freeing
> > resourcesand should not be done with interrupts disabled.
> > 
> > Produce the following warning:
> 
> Shouldn't you first do cm_id_priv->qp = NULL and only then
> unlock and destroy the qp?
> 
> Mark
> >  	}
> >  	spin_unlock_irqrestore(&cm_id_priv->lock, flags);

Would avoid the sketchy unlock too.. But does it work?

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 72141c5b7c95..94566271dbff 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -427,7 +427,9 @@  static void destroy_cm_id(struct iw_cm_id *cm_id)
 		break;
 	}
 	if (cm_id_priv->qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		cm_id_priv->qp = NULL;
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);