diff mbox

IB/cm: Do not queue a work when the device is going to be removed

Message ID 1435241602-12104-1-git-send-email-erezsh@mellanox.com (mailing list archive)
State Accepted
Headers show

Commit Message

Erez Shitrit June 25, 2015, 2:13 p.m. UTC
Whenever ib_cm gets remove_one call, like when there is a hot-unplug
event, the driver should mark itself as going_down and confirm that no
new works are going to be queued for that device.
so, the order of the actions are:
1. mark the going_down bit.
2. flush the wq.
3. [make sure no new works for that device.]
4. unregister mad agent.

otherwise, works that are already queued can be scheduled after the mad
agent was freed.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
---
 drivers/infiniband/core/cm.c | 61 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 6 deletions(-)

Comments

Bart Van Assche June 25, 2015, 3:51 p.m. UTC | #1
On 06/25/2015 07:13 AM, Erez Shitrit wrote:
> @@ -3864,14 +3904,23 @@ static void cm_remove_one(struct ib_device *ib_device)
>   	list_del(&cm_dev->list);
>   	write_unlock_irqrestore(&cm.device_lock, flags);
>
> +	spin_lock_irq(&cm.lock);
> +	cm_dev->going_down = 1;
> +	spin_unlock_irq(&cm.lock);
> +
>   	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
>   		if (!rdma_cap_ib_cm(ib_device, i))
>   			continue;
>
>   		port = cm_dev->port[i-1];
>   		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
> -		ib_unregister_mad_agent(port->mad_agent);
> +		/*
> +		 * We flush the queue here after the going_down set, this
> +		 * verify that no new works will be queued in the recv handler,
> +		 * after that we can call the unregister_mad_agent
> +		 */
>   		flush_workqueue(cm.wq);
> +		ib_unregister_mad_agent(port->mad_agent);
>   		cm_remove_port_fs(port);
>   	}
>   	device_unregister(cm_dev->device);

Hello Erez,

How about splitting unregister_mad_agent() into two functions, one that 
stops the invocation of the receive callbacks and another one that 
cancels all sends ? If the new function that stops the receive callbacks 
would be invoked before flush_workqueue(), would that be safe ? Would 
that allow to drop the new flag "going_down" since the workqueue 
implementation already sets __WQ_DRAINING ?

Thanks,

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Erez Shitrit June 26, 2015, 11:07 a.m. UTC | #2
On Thu, Jun 25, 2015 at 6:51 PM, Bart Van Assche
<bart.vanassche@sandisk.com> wrote:
> On 06/25/2015 07:13 AM, Erez Shitrit wrote:
>>
>> @@ -3864,14 +3904,23 @@ static void cm_remove_one(struct ib_device
>> *ib_device)
>>         list_del(&cm_dev->list);
>>         write_unlock_irqrestore(&cm.device_lock, flags);
>>
>> +       spin_lock_irq(&cm.lock);
>> +       cm_dev->going_down = 1;
>> +       spin_unlock_irq(&cm.lock);
>> +
>>         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
>>                 if (!rdma_cap_ib_cm(ib_device, i))
>>                         continue;
>>
>>                 port = cm_dev->port[i-1];
>>                 ib_modify_port(ib_device, port->port_num, 0,
>> &port_modify);
>> -               ib_unregister_mad_agent(port->mad_agent);
>> +               /*
>> +                * We flush the queue here after the going_down set, this
>> +                * verify that no new works will be queued in the recv
>> handler,
>> +                * after that we can call the unregister_mad_agent
>> +                */
>>                 flush_workqueue(cm.wq);
>> +               ib_unregister_mad_agent(port->mad_agent);
>>                 cm_remove_port_fs(port);
>>         }
>>         device_unregister(cm_dev->device);
>
>
> Hello Erez,
>
> How about splitting unregister_mad_agent() into two functions, one that
> stops the invocation of the receive callbacks and another one that cancels
> all sends ? If the new function that stops the receive callbacks would be
> invoked before flush_workqueue(), would that be safe ?
No, still works that are pending in the queue will need the mad_agent,
the best is to finish all the pending works, and not let new works to
come in.

 Would that allow to
> drop the new flag "going_down" since the workqueue implementation already
> sets __WQ_DRAINING ?
>
> Thanks,
>
> Bart.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford July 9, 2015, 2:45 p.m. UTC | #3
On 06/25/2015 10:13 AM, Erez Shitrit wrote:
> Whenever ib_cm gets remove_one call, like when there is a hot-unplug
> event, the driver should mark itself as going_down and confirm that no
> new works are going to be queued for that device.
> so, the order of the actions are:
> 1. mark the going_down bit.
> 2. flush the wq.
> 3. [make sure no new works for that device.]
> 4. unregister mad agent.
> 
> otherwise, works that are already queued can be scheduled after the mad
> agent was freed.
> 
> Signed-off-by: Erez Shitrit <erezsh@mellanox.com>

Thanks, applied.
diff mbox

Patch

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index dbddddd..3a972eb 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -169,6 +169,7 @@  struct cm_device {
 	struct ib_device *ib_device;
 	struct device *device;
 	u8 ack_delay;
+	int going_down;
 	struct cm_port *port[0];
 };
 
@@ -805,6 +806,11 @@  static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
 {
 	int wait_time;
 	unsigned long flags;
+	struct cm_device *cm_dev;
+
+	cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
+	if (!cm_dev)
+		return;
 
 	spin_lock_irqsave(&cm.lock, flags);
 	cm_cleanup_timewait(cm_id_priv->timewait_info);
@@ -818,8 +824,14 @@  static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
 	 */
 	cm_id_priv->id.state = IB_CM_TIMEWAIT;
 	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
-	queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
-			   msecs_to_jiffies(wait_time));
+
+	/* Check if the device started its remove_one */
+	spin_lock_irq(&cm.lock);
+	if (!cm_dev->going_down)
+		queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
+				   msecs_to_jiffies(wait_time));
+	spin_unlock_irq(&cm.lock);
+
 	cm_id_priv->timewait_info = NULL;
 }
 
@@ -3305,6 +3317,11 @@  static int cm_establish(struct ib_cm_id *cm_id)
 	struct cm_work *work;
 	unsigned long flags;
 	int ret = 0;
+	struct cm_device *cm_dev;
+
+	cm_dev = ib_get_client_data(cm_id->device, &cm_client);
+	if (!cm_dev)
+		return -ENODEV;
 
 	work = kmalloc(sizeof *work, GFP_ATOMIC);
 	if (!work)
@@ -3343,7 +3360,17 @@  static int cm_establish(struct ib_cm_id *cm_id)
 	work->remote_id = cm_id->remote_id;
 	work->mad_recv_wc = NULL;
 	work->cm_event.event = IB_CM_USER_ESTABLISHED;
-	queue_delayed_work(cm.wq, &work->work, 0);
+
+	/* Check if the device started its remove_one */
+	spin_lock_irq(&cm.lock);
+	if (!cm_dev->going_down) {
+		queue_delayed_work(cm.wq, &work->work, 0);
+	} else {
+		kfree(work);
+		ret = -ENODEV;
+	}
+	spin_unlock_irq(&cm.lock);
+
 out:
 	return ret;
 }
@@ -3394,6 +3421,7 @@  static void cm_recv_handler(struct ib_mad_agent *mad_agent,
 	enum ib_cm_event_type event;
 	u16 attr_id;
 	int paths = 0;
+	int going_down = 0;
 
 	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
 	case CM_REQ_ATTR_ID:
@@ -3452,7 +3480,19 @@  static void cm_recv_handler(struct ib_mad_agent *mad_agent,
 	work->cm_event.event = event;
 	work->mad_recv_wc = mad_recv_wc;
 	work->port = port;
-	queue_delayed_work(cm.wq, &work->work, 0);
+
+	/* Check if the device started its remove_one */
+	spin_lock_irq(&cm.lock);
+	if (!port->cm_dev->going_down)
+		queue_delayed_work(cm.wq, &work->work, 0);
+	else
+		going_down = 1;
+	spin_unlock_irq(&cm.lock);
+
+	if (going_down) {
+		kfree(work);
+		ib_free_recv_mad(mad_recv_wc);
+	}
 }
 
 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
@@ -3771,7 +3811,7 @@  static void cm_add_one(struct ib_device *ib_device)
 
 	cm_dev->ib_device = ib_device;
 	cm_get_ack_delay(cm_dev);
-
+	cm_dev->going_down = 0;
 	cm_dev->device = device_create(&cm_class, &ib_device->dev,
 				       MKDEV(0, 0), NULL,
 				       "%s", ib_device->name);
@@ -3864,14 +3904,23 @@  static void cm_remove_one(struct ib_device *ib_device)
 	list_del(&cm_dev->list);
 	write_unlock_irqrestore(&cm.device_lock, flags);
 
+	spin_lock_irq(&cm.lock);
+	cm_dev->going_down = 1;
+	spin_unlock_irq(&cm.lock);
+
 	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
 		if (!rdma_cap_ib_cm(ib_device, i))
 			continue;
 
 		port = cm_dev->port[i-1];
 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
-		ib_unregister_mad_agent(port->mad_agent);
+		/*
+		 * We flush the queue here after the going_down set, this
+		 * verify that no new works will be queued in the recv handler,
+		 * after that we can call the unregister_mad_agent
+		 */
 		flush_workqueue(cm.wq);
+		ib_unregister_mad_agent(port->mad_agent);
 		cm_remove_port_fs(port);
 	}
 	device_unregister(cm_dev->device);