diff mbox series

[v2] scsi: iscsi: Fix multiple iscsi session unbind event sent to userspace

Message ID 20220418000627.474784-1-haowenchao@huawei.com (mailing list archive)
State Changes Requested
Headers show
Series [v2] scsi: iscsi: Fix multiple iscsi session unbind event sent to userspace | expand

Commit Message

Wenchao Hao April 18, 2022, 12:06 a.m. UTC
I found an issue that kernel would send ISCSI_KEVENT_UNBIND_SESSION
for multiple times which should be fixed.

This patch introduce target_unbound in iscsi_cls_session to make
sure session would send only one ISCSI_KEVENT_UNBIND_SESSION.

But this would break issue fixed in commit 13e60d3ba287 ("scsi: iscsi:
Report unbind session event when the target has been removed"). The issue
is iscsid died for any reason after it send unbind session to kernel, once
iscsid restart again, it loss kernel's ISCSI_KEVENT_UNBIND_SESSION event.

Now kernel think iscsi_cls_session has already sent an
ISCSI_KEVENT_UNBIND_SESSION event and would not send it any more. Which
would cause userspace unable to logout. Actually the session is in
invalid state(it's target_id is INVALID), iscsid should not sync this
session in it's restart.

So we need to check session's target unbound state during iscsid restart,
if session is in unbound state, do not sync this session and perform
session teardown. It's reasonable because once a session is unbound, we
can not recover it any more(mainly because it's target id is INVALID)

Changes from V1:
- Using target_unbound rather than state to indicate session has been
  unbound

Signed-off-by: Wenchao Hao <haowenchao@huawei.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 21 +++++++++++++++++++++
 include/scsi/scsi_transport_iscsi.h |  1 +
 2 files changed, 22 insertions(+)

Comments

Mike Christie April 20, 2022, 4:28 p.m. UTC | #1
On 4/17/22 7:06 PM, Wenchao Hao wrote:
> I found an issue that kernel would send ISCSI_KEVENT_UNBIND_SESSION
> for multiple times which should be fixed.
> 
> This patch introduce target_unbound in iscsi_cls_session to make
> sure session would send only one ISCSI_KEVENT_UNBIND_SESSION.
> 
> But this would break issue fixed in commit 13e60d3ba287 ("scsi: iscsi:
> Report unbind session event when the target has been removed"). The issue
> is iscsid died for any reason after it send unbind session to kernel, once
> iscsid restart again, it loss kernel's ISCSI_KEVENT_UNBIND_SESSION event.
> 
> Now kernel think iscsi_cls_session has already sent an
> ISCSI_KEVENT_UNBIND_SESSION event and would not send it any more. Which
> would cause userspace unable to logout. Actually the session is in
> invalid state(it's target_id is INVALID), iscsid should not sync this
> session in it's restart.
> 
> So we need to check session's target unbound state during iscsid restart,
> if session is in unbound state, do not sync this session and perform
> session teardown. It's reasonable because once a session is unbound, we
> can not recover it any more(mainly because it's target id is INVALID)
> 
> Changes from V1:
> - Using target_unbound rather than state to indicate session has been
>   unbound
> 
> Signed-off-by: Wenchao Hao <haowenchao@huawei.com>
> ---
>  drivers/scsi/scsi_transport_iscsi.c | 21 +++++++++++++++++++++
>  include/scsi/scsi_transport_iscsi.h |  1 +
>  2 files changed, 22 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
> index 2c0dd64159b0..43ba31e595b4 100644
> --- a/drivers/scsi/scsi_transport_iscsi.c
> +++ b/drivers/scsi/scsi_transport_iscsi.c
> @@ -1958,6 +1958,14 @@ static void __iscsi_unbind_session(struct work_struct *work)
>  
>  	ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n");
>  
> +	spin_lock_irqsave(&session->lock, flags);
> +	if (session->target_unbound) {
> +		spin_unlock_irqrestore(&session->lock, flags);
> +		return;
> +	}
> +	session->target_unbound = 1;

Shoot, sorry, I think I gave you a bad review comment when I said we
could do a bool or state kind or variable.

If we set unbound here and iscsid was restarting at this point then
iscsid really only knows the target removal process is starting up. It
doesn't know that the target is not yet removed. We could be doing sync
caches and/or still tearing down scsi_devices/LUNs.

For the comments I gave you on the userspace PR parts, would it be
easier if this was a state type of value? Above you would set it to
REMOVING. When scsi_remove_target is done then we can set it to
REMOVED. That combined with the session and conn states we can detect
how far we got in the session removal process if iscsid dies in the
middle of it.

What do you think?


> +	spin_unlock_irqrestore(&session->lock, flags);
> +
>  	/* Prevent new scans and make sure scanning is not in progress */
>  	mutex_lock(&ihost->mutex);
>  	spin_lock_irqsave(&session->lock, flags)

...

> diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
> index 9acb8422f680..877632c25e56 100644
> --- a/include/scsi/scsi_transport_iscsi.h
> +++ b/include/scsi/scsi_transport_iscsi.h
> @@ -256,6 +256,7 @@ struct iscsi_cls_session {
>  	struct workqueue_struct *workq;
>  
>  	unsigned int target_id;
> +	int target_unbound;   /* make sure unbind session only once */


We don't need the comment since the code using this is so simple
and the name of the variable tells us what it's for.


>  	bool ida_used;
>  
>  	/*
Ulrich Windl April 21, 2022, 6:11 a.m. UTC | #2
>>> Mike Christie <michael.christie@oracle.com> schrieb am 20.04.2022 um 18:28 in
Nachricht <938bca13-2dcc-24c0-51b5-26f7e7238776@oracle.com>:

...
> 
>> diff --git a/include/scsi/scsi_transport_iscsi.h 
> b/include/scsi/scsi_transport_iscsi.h
>> index 9acb8422f680..877632c25e56 100644
>> --- a/include/scsi/scsi_transport_iscsi.h
>> +++ b/include/scsi/scsi_transport_iscsi.h
>> @@ -256,6 +256,7 @@ struct iscsi_cls_session {
>>  	struct workqueue_struct *workq;
>>  
>>  	unsigned int target_id;
>> +	int target_unbound;   /* make sure unbind session only once */
> 
> 
> We don't need the comment since the code using this is so simple
> and the name of the variable tells us what it's for.

Actually I think a comment may be worth it, but it should say what the variable expresses, not what it is used for
(the use may change, but hopefully not the semantics (unless updated globally)).
So maybe: /* is target unbound? */
(the question mnark emphasizing that it is a boolean type of variable)
But still, if the name is mostly identical to the comment, one may leave out the comment.

Regards,
Ulrich

> 
> 
>>  	bool ida_used;
>>  
>>  	/*
> 
> -- 
> You received this message because you are subscribed to the Google Groups 
> "open-iscsi" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to open-iscsi+unsubscribe@googlegroups.com.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/open-iscsi/938bca13-2dcc-24c0-51b5-26f7e723 
> 8776%40oracle.com.
Wenchao Hao April 21, 2022, 3:18 p.m. UTC | #3
On 2022/4/21 0:28, Mike Christie wrote:
> On 4/17/22 7:06 PM, Wenchao Hao wrote:
>> I found an issue that kernel would send ISCSI_KEVENT_UNBIND_SESSION
>> for multiple times which should be fixed.
>>
>> This patch introduce target_unbound in iscsi_cls_session to make
>> sure session would send only one ISCSI_KEVENT_UNBIND_SESSION.
>>
>> But this would break issue fixed in commit 13e60d3ba287 ("scsi: iscsi:
>> Report unbind session event when the target has been removed"). The issue
>> is iscsid died for any reason after it send unbind session to kernel, once
>> iscsid restart again, it loss kernel's ISCSI_KEVENT_UNBIND_SESSION event.
>>
>> Now kernel think iscsi_cls_session has already sent an
>> ISCSI_KEVENT_UNBIND_SESSION event and would not send it any more. Which
>> would cause userspace unable to logout. Actually the session is in
>> invalid state(it's target_id is INVALID), iscsid should not sync this
>> session in it's restart.
>>
>> So we need to check session's target unbound state during iscsid restart,
>> if session is in unbound state, do not sync this session and perform
>> session teardown. It's reasonable because once a session is unbound, we
>> can not recover it any more(mainly because it's target id is INVALID)
>>
>> Changes from V1:
>> - Using target_unbound rather than state to indicate session has been
>>   unbound
>>
>> Signed-off-by: Wenchao Hao <haowenchao@huawei.com>
>> ---
>>  drivers/scsi/scsi_transport_iscsi.c | 21 +++++++++++++++++++++
>>  include/scsi/scsi_transport_iscsi.h |  1 +
>>  2 files changed, 22 insertions(+)
>>
>> diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
>> index 2c0dd64159b0..43ba31e595b4 100644
>> --- a/drivers/scsi/scsi_transport_iscsi.c
>> +++ b/drivers/scsi/scsi_transport_iscsi.c
>> @@ -1958,6 +1958,14 @@ static void __iscsi_unbind_session(struct work_struct *work)
>>  
>>  	ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n");
>>  
>> +	spin_lock_irqsave(&session->lock, flags);
>> +	if (session->target_unbound) {
>> +		spin_unlock_irqrestore(&session->lock, flags);
>> +		return;
>> +	}
>> +	session->target_unbound = 1;
> 
> Shoot, sorry, I think I gave you a bad review comment when I said we
> could do a bool or state kind or variable.
> 
> If we set unbound here and iscsid was restarting at this point then
> iscsid really only knows the target removal process is starting up. It
> doesn't know that the target is not yet removed. We could be doing sync
> caches and/or still tearing down scsi_devices/LUNs.
> 
> For the comments I gave you on the userspace PR parts, would it be
> easier if this was a state type of value? Above you would set it to
> REMOVING. When scsi_remove_target is done then we can set it to
> REMOVED. That combined with the session and conn states we can detect
> how far we got in the session removal process if iscsid dies in the
> middle of it.
> 
> What do you think?
> 

I thought about setting this bool to true after ISCSI_KEVENT_UNBIND_SESSION has
been sent in __iscsi_unbind_session(), it's not a good way too, the sync session
and unbind target would run concurrency.

If we need make sure iscsid call in session_conn_shutdown() after kernel's
scsi_remove_target() has finished, we must make it a state type.

We need think about how to set the initial value of this state. Since we only
cares about the removing state, the easiest way is setting it to INITED when allocing
session. When iscsid restart and found it's INITED, still sync this session.

Based on your REMOVING and REMOVED state, state is set to REMOVING at beginning
of __iscsi_unbind_session() and set to REMOVED after scsi_remove_target() done.
When iscsid restart and found this state is REMOVING, it do nothing, just waiting
for ISCSI_KEVENT_UNBIND_SESSION event. If the state is REMOVED, it should start
shutting down(both check session and conn state as you mentioned in my PR).

> 
>> +	spin_unlock_irqrestore(&session->lock, flags);
>> +
>>  	/* Prevent new scans and make sure scanning is not in progress */
>>  	mutex_lock(&ihost->mutex);
>>  	spin_lock_irqsave(&session->lock, flags)
> 
> ...
> 
>> diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
>> index 9acb8422f680..877632c25e56 100644
>> --- a/include/scsi/scsi_transport_iscsi.h
>> +++ b/include/scsi/scsi_transport_iscsi.h
>> @@ -256,6 +256,7 @@ struct iscsi_cls_session {
>>  	struct workqueue_struct *workq;
>>  
>>  	unsigned int target_id;
>> +	int target_unbound;   /* make sure unbind session only once */
> 
> 
> We don't need the comment since the code using this is so simple
> and the name of the variable tells us what it's for.
> 
> 
>>  	bool ida_used;
>>  
>>  	/*
> 
> .
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 2c0dd64159b0..43ba31e595b4 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1958,6 +1958,14 @@  static void __iscsi_unbind_session(struct work_struct *work)
 
 	ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n");
 
+	spin_lock_irqsave(&session->lock, flags);
+	if (session->target_unbound) {
+		spin_unlock_irqrestore(&session->lock, flags);
+		return;
+	}
+	session->target_unbound = 1;
+	spin_unlock_irqrestore(&session->lock, flags);
+
 	/* Prevent new scans and make sure scanning is not in progress */
 	mutex_lock(&ihost->mutex);
 	spin_lock_irqsave(&session->lock, flags);
@@ -2058,6 +2066,7 @@  int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id)
 		session->target_id = target_id;
 
 	dev_set_name(&session->dev, "session%u", session->sid);
+	session->target_unbound = 0;
 	err = device_add(&session->dev);
 	if (err) {
 		iscsi_cls_session_printk(KERN_ERR, session,
@@ -4319,6 +4328,15 @@  iscsi_session_attr(def_taskmgmt_tmo, ISCSI_PARAM_DEF_TASKMGMT_TMO, 0);
 iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0);
 iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0);
 
+static ssize_t
+show_priv_session_target_unbound(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
+	return sysfs_emit(buf, "%d\n", session->target_unbound);
+}
+static ISCSI_CLASS_ATTR(priv_sess, target_unbound, S_IRUGO,
+			show_priv_session_target_unbound, NULL);
 static ssize_t
 show_priv_session_state(struct device *dev, struct device_attribute *attr,
 			char *buf)
@@ -4422,6 +4440,7 @@  static struct attribute *iscsi_session_attrs[] = {
 	&dev_attr_priv_sess_recovery_tmo.attr,
 	&dev_attr_priv_sess_state.attr,
 	&dev_attr_priv_sess_creator.attr,
+	&dev_attr_priv_sess_target_unbound.attr,
 	&dev_attr_sess_chap_out_idx.attr,
 	&dev_attr_sess_chap_in_idx.attr,
 	&dev_attr_priv_sess_target_id.attr,
@@ -4534,6 +4553,8 @@  static umode_t iscsi_session_attr_is_visible(struct kobject *kobj,
 		return S_IRUGO | S_IWUSR;
 	else if (attr == &dev_attr_priv_sess_state.attr)
 		return S_IRUGO;
+	else if (attr == &dev_attr_priv_sess_target_unbound.attr)
+		return S_IRUGO;
 	else if (attr == &dev_attr_priv_sess_creator.attr)
 		return S_IRUGO;
 	else if (attr == &dev_attr_priv_sess_target_id.attr)
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 9acb8422f680..877632c25e56 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -256,6 +256,7 @@  struct iscsi_cls_session {
 	struct workqueue_struct *workq;
 
 	unsigned int target_id;
+	int target_unbound;   /* make sure unbind session only once */
 	bool ida_used;
 
 	/*