diff mbox series

[v3,1/2] iscsi-target: fix login error when receiving is too fast

Message ID 20200424123528.17627-2-houpu@bytedance.com (mailing list archive)
State Changes Requested
Headers show
Series iscsi-target: fix login error when receiving is too fast | expand

Commit Message

Hou Pu April 24, 2020, 12:35 p.m. UTC
iscsi_target_sk_data_ready() could be invoked indirectly
by iscsi_target_do_login_rx() from workqueue like following:

iscsi_target_do_login_rx()
  iscsi_target_do_login()
    iscsi_target_do_tx_login_io()
      iscsit_put_login_tx()
        iscsi_login_tx_data()
          tx_data()
            sock_sendmsg_nosec()
              tcp_sendmsg()
                release_sock()
                  sk_backlog_rcv()
                    tcp_v4_do_rcv()
                      tcp_data_ready()
                        iscsi_target_sk_data_ready()

At that time LOGIN_FLAGS_READ_ACTIVE is not cleared.
and iscsi_target_sk_data_ready will not read data
from the socket. And some iscsi initiator(i.e. libiscsi)
will wait forever for a reply.

LOGIN_FLAGS_READ_ACTIVE should be cleared early just after
doing the receive and before writing to the socket in
iscsi_target_do_login_rx.

But sad news is that LOGIN_FLAGS_READ_ACTIVE is also used
by sk_state_change to do login cleanup if a socket was closed
at login time. It is supposed to be cleared after the login
pdu is successfully processed and replied.

So introduce another flag LOGIN_FLAGS_WRITE_ACTIVE to cover
the transmit part so that sk_state_change could work well
and clear LOGIN_FLAGS_READ_ACTIVE early so that sk_data_ready
could also work.

While at here, use login_flags more efficient.

Signed-off-by: Hou Pu <houpu@bytedance.com>
---
 drivers/target/iscsi/iscsi_target_nego.c | 30 ++++++++++++++++++++++++++----
 include/target/iscsi/iscsi_target_core.h |  9 +++++----
 2 files changed, 31 insertions(+), 8 deletions(-)

Comments

Mike Christie April 24, 2020, 5:01 p.m. UTC | #1
On 4/24/20 7:35 AM, Hou Pu wrote:
> iscsi_target_sk_data_ready() could be invoked indirectly
> by iscsi_target_do_login_rx() from workqueue like following:
> 
> iscsi_target_do_login_rx()
>   iscsi_target_do_login()
>     iscsi_target_do_tx_login_io()
>       iscsit_put_login_tx()
>         iscsi_login_tx_data()
>           tx_data()
>             sock_sendmsg_nosec()
>               tcp_sendmsg()
>                 release_sock()
>                   sk_backlog_rcv()
>                     tcp_v4_do_rcv()
>                       tcp_data_ready()
>                         iscsi_target_sk_data_ready()
> 
> At that time LOGIN_FLAGS_READ_ACTIVE is not cleared.
> and iscsi_target_sk_data_ready will not read data
> from the socket. And some iscsi initiator(i.e. libiscsi)
> will wait forever for a reply.
> 
> LOGIN_FLAGS_READ_ACTIVE should be cleared early just after
> doing the receive and before writing to the socket in
> iscsi_target_do_login_rx.
> 
> But sad news is that LOGIN_FLAGS_READ_ACTIVE is also used
> by sk_state_change to do login cleanup if a socket was closed
> at login time. It is supposed to be cleared after the login
> pdu is successfully processed and replied.
> 
> So introduce another flag LOGIN_FLAGS_WRITE_ACTIVE to cover
> the transmit part so that sk_state_change could work well
> and clear LOGIN_FLAGS_READ_ACTIVE early so that sk_data_ready
> could also work.
> 
> While at here, use login_flags more efficient.
> 
> Signed-off-by: Hou Pu <houpu@bytedance.com>
> ---
>  drivers/target/iscsi/iscsi_target_nego.c | 30 ++++++++++++++++++++++++++----
>  include/target/iscsi/iscsi_target_core.h |  9 +++++----
>  2 files changed, 31 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
> index 685d771b51d4..4cfa742e61df 100644
> --- a/drivers/target/iscsi/iscsi_target_nego.c
> +++ b/drivers/target/iscsi/iscsi_target_nego.c
> @@ -622,6 +622,26 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
>  	if (rc < 0)
>  		goto err;
>  
> +	/*
> +	 * LOGIN_FLAGS_READ_ACTIVE is cleared so that sk_data_ready
> +	 * could be trigger again after this.
> +	 *
> +	 * LOGIN_FLAGS_WRITE_ACTIVE is cleared after we successfully
> +	 * process a login pdu, so that sk_state_chage could do login
> +	 * cleanup as needed if the socket is closed. If a delayed work is
> +	 * ongoing (LOGIN_FLAGS_WRITE_ACTIVE or LOGIN_FLAGS_READ_ACTIVE),
> +	 * sk_state_change will leave the cleanup to the delayed work or
> +	 * it will schedule a delayed work to do cleanup.
> +	 */
> +	if (conn->sock) {
> +		struct sock *sk = conn->sock->sk;
> +
> +		write_lock_bh(&sk->sk_callback_lock);
> +		clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
> +		set_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags);
> +		write_unlock_bh(&sk->sk_callback_lock);
> +	}
> +

Hey,

I had one more question.

With the above code, I think we have a race where if we clear the bit
above early and iscsi_target_sk_data_ready runs while
iscsi_target_do_login_rx is still running then we could queue the work
an extra time and get stuck. Because the bit is now not set, if
iscsi_target_sk_data_ready runs it will end up calling
schedule_delayed_work which will queue up the work again since the work
is running and not pending.

If that is correct and we hit the race what happens if this was the last
login pdu, and we are supposed to go to full feature phase next? For
example if iscsi_target_do_login_rx runs an extra time, will we end up
stuck waiting in iscsi_target_do_login_rx's call to:

rc = conn->conn_transport->iscsit_get_login_rx(conn, login);

?


>  	pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n",
>  			conn, current->comm, current->pid);
>  
> @@ -629,7 +649,8 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
>  	if (rc < 0) {
>  		goto err;
>  	} else if (!rc) {
> -		if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE))
> +		if (iscsi_target_sk_check_and_clear(conn,
> +						    LOGIN_FLAGS_WRITE_ACTIVE))
>  			goto err;
>  	} else if (rc == 1) {
>  		iscsi_target_nego_release(conn);
> @@ -670,9 +691,10 @@ static void iscsi_target_sk_state_change(struct sock *sk)
>  	state = __iscsi_target_sk_check_close(sk);
>  	pr_debug("__iscsi_target_sk_close_change: state: %d\n", state);
>  
> -	if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) {
> -		pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change"
> -			 " conn: %p\n", conn);
> +	if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags) ||
> +	    test_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags)) {
> +		pr_debug("Got LOGIN_FLAGS_{READ|WRITE}_ACTIVE=1"
> +			 " sk_state_change conn: %p\n", conn);
>  		if (state)
>  			set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags);
>  		write_unlock_bh(&sk->sk_callback_lock);
> diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
> index 591cd9e4692c..844bef255e89 100644
> --- a/include/target/iscsi/iscsi_target_core.h
> +++ b/include/target/iscsi/iscsi_target_core.h
> @@ -566,10 +566,11 @@ struct iscsi_conn {
>  	struct socket		*sock;
>  	void			(*orig_data_ready)(struct sock *);
>  	void			(*orig_state_change)(struct sock *);
> -#define LOGIN_FLAGS_READ_ACTIVE		1
> -#define LOGIN_FLAGS_CLOSED		2
> -#define LOGIN_FLAGS_READY		4
> -#define LOGIN_FLAGS_INITIAL_PDU		8
> +#define LOGIN_FLAGS_READY		0
> +#define LOGIN_FLAGS_INITIAL_PDU		1
> +#define LOGIN_FLAGS_READ_ACTIVE		2
> +#define LOGIN_FLAGS_WRITE_ACTIVE	3
> +#define LOGIN_FLAGS_CLOSED		4
>  	unsigned long		login_flags;
>  	struct delayed_work	login_work;
>  	struct iscsi_login	*login;
>
Mike Christie April 25, 2020, 5:45 p.m. UTC | #2
On 4/24/20 12:01 PM, Mike Christie wrote:
> On 4/24/20 7:35 AM, Hou Pu wrote:
>> iscsi_target_sk_data_ready() could be invoked indirectly
>> by iscsi_target_do_login_rx() from workqueue like following:
>>
>> iscsi_target_do_login_rx()
>>   iscsi_target_do_login()
>>     iscsi_target_do_tx_login_io()
>>       iscsit_put_login_tx()
>>         iscsi_login_tx_data()
>>           tx_data()
>>             sock_sendmsg_nosec()
>>               tcp_sendmsg()
>>                 release_sock()
>>                   sk_backlog_rcv()
>>                     tcp_v4_do_rcv()
>>                       tcp_data_ready()
>>                         iscsi_target_sk_data_ready()
>>
>> At that time LOGIN_FLAGS_READ_ACTIVE is not cleared.
>> and iscsi_target_sk_data_ready will not read data
>> from the socket. And some iscsi initiator(i.e. libiscsi)
>> will wait forever for a reply.
>>
>> LOGIN_FLAGS_READ_ACTIVE should be cleared early just after
>> doing the receive and before writing to the socket in
>> iscsi_target_do_login_rx.
>>
>> But sad news is that LOGIN_FLAGS_READ_ACTIVE is also used
>> by sk_state_change to do login cleanup if a socket was closed
>> at login time. It is supposed to be cleared after the login
>> pdu is successfully processed and replied.
>>
>> So introduce another flag LOGIN_FLAGS_WRITE_ACTIVE to cover
>> the transmit part so that sk_state_change could work well
>> and clear LOGIN_FLAGS_READ_ACTIVE early so that sk_data_ready
>> could also work.
>>
>> While at here, use login_flags more efficient.
>>
>> Signed-off-by: Hou Pu <houpu@bytedance.com>
>> ---
>>  drivers/target/iscsi/iscsi_target_nego.c | 30 ++++++++++++++++++++++++++----
>>  include/target/iscsi/iscsi_target_core.h |  9 +++++----
>>  2 files changed, 31 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
>> index 685d771b51d4..4cfa742e61df 100644
>> --- a/drivers/target/iscsi/iscsi_target_nego.c
>> +++ b/drivers/target/iscsi/iscsi_target_nego.c
>> @@ -622,6 +622,26 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
>>  	if (rc < 0)
>>  		goto err;
>>  
>> +	/*
>> +	 * LOGIN_FLAGS_READ_ACTIVE is cleared so that sk_data_ready
>> +	 * could be trigger again after this.
>> +	 *
>> +	 * LOGIN_FLAGS_WRITE_ACTIVE is cleared after we successfully
>> +	 * process a login pdu, so that sk_state_chage could do login
>> +	 * cleanup as needed if the socket is closed. If a delayed work is
>> +	 * ongoing (LOGIN_FLAGS_WRITE_ACTIVE or LOGIN_FLAGS_READ_ACTIVE),
>> +	 * sk_state_change will leave the cleanup to the delayed work or
>> +	 * it will schedule a delayed work to do cleanup.
>> +	 */
>> +	if (conn->sock) {
>> +		struct sock *sk = conn->sock->sk;
>> +
>> +		write_lock_bh(&sk->sk_callback_lock);
>> +		clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
>> +		set_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags);
>> +		write_unlock_bh(&sk->sk_callback_lock);
>> +	}
>> +
> Hey,
> 
> I had one more question.
> 
> With the above code, I think we have a race where if we clear the bit
> above early and iscsi_target_sk_data_ready runs while
> iscsi_target_do_login_rx is still running then we could queue the work
> an extra time and get stuck. Because the bit is now not set, if
> iscsi_target_sk_data_ready runs it will end up calling
> schedule_delayed_work which will queue up the work again since the work
> is running and not pending.
> 
> If that is correct and we hit the race what happens if this was the last
> login pdu, and we are supposed to go to full feature phase next? For
> example if iscsi_target_do_login_rx runs an extra time, will we end up
> stuck waiting in iscsi_target_do_login_rx's call to:
> 
> rc = conn->conn_transport->iscsit_get_login_rx(conn, login);
> 
> ?

Just answering my own question. It looks like we do not get stuck. But
we either get nothing on the session so the login timeout fires and we
drop the session. Or, we get a PDU and the login thread reads it in
before the normal rx thread does, but it assumes it is a login related
and we most likely drop the session due to invalid fields.

I think in iscsi_target_restore_sock_callbacks we want to do a:

cancel_delayed_work(&conn->login_work)

after we reset the callbacks and drop the sk_callback_lock lock.
Hou Pu April 26, 2020, 5:49 a.m. UTC | #3
On 2020/4/26 1:45 上午, Mike Christie wrote:
> On 4/24/20 12:01 PM, Mike Christie wrote:
>> On 4/24/20 7:35 AM, Hou Pu wrote:
>>> iscsi_target_sk_data_ready() could be invoked indirectly
>>> by iscsi_target_do_login_rx() from workqueue like following:
>>>
>>> iscsi_target_do_login_rx()
>>>    iscsi_target_do_login()
>>>      iscsi_target_do_tx_login_io()
>>>        iscsit_put_login_tx()
>>>          iscsi_login_tx_data()
>>>            tx_data()
>>>              sock_sendmsg_nosec()
>>>                tcp_sendmsg()
>>>                  release_sock()
>>>                    sk_backlog_rcv()
>>>                      tcp_v4_do_rcv()
>>>                        tcp_data_ready()
>>>                          iscsi_target_sk_data_ready()
>>>
>>> At that time LOGIN_FLAGS_READ_ACTIVE is not cleared.
>>> and iscsi_target_sk_data_ready will not read data
>>> from the socket. And some iscsi initiator(i.e. libiscsi)
>>> will wait forever for a reply.
>>>
>>> LOGIN_FLAGS_READ_ACTIVE should be cleared early just after
>>> doing the receive and before writing to the socket in
>>> iscsi_target_do_login_rx.
>>>
>>> But sad news is that LOGIN_FLAGS_READ_ACTIVE is also used
>>> by sk_state_change to do login cleanup if a socket was closed
>>> at login time. It is supposed to be cleared after the login
>>> pdu is successfully processed and replied.
>>>
>>> So introduce another flag LOGIN_FLAGS_WRITE_ACTIVE to cover
>>> the transmit part so that sk_state_change could work well
>>> and clear LOGIN_FLAGS_READ_ACTIVE early so that sk_data_ready
>>> could also work.
>>>
>>> While at here, use login_flags more efficient.
>>>
>>> Signed-off-by: Hou Pu <houpu@bytedance.com>
>>> ---
>>>   drivers/target/iscsi/iscsi_target_nego.c | 30 ++++++++++++++++++++++++++----
>>>   include/target/iscsi/iscsi_target_core.h |  9 +++++----
>>>   2 files changed, 31 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
>>> index 685d771b51d4..4cfa742e61df 100644
>>> --- a/drivers/target/iscsi/iscsi_target_nego.c
>>> +++ b/drivers/target/iscsi/iscsi_target_nego.c
>>> @@ -622,6 +622,26 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
>>>   	if (rc < 0)
>>>   		goto err;
>>>   
>>> +	/*
>>> +	 * LOGIN_FLAGS_READ_ACTIVE is cleared so that sk_data_ready
>>> +	 * could be trigger again after this.
>>> +	 *
>>> +	 * LOGIN_FLAGS_WRITE_ACTIVE is cleared after we successfully
>>> +	 * process a login pdu, so that sk_state_chage could do login
>>> +	 * cleanup as needed if the socket is closed. If a delayed work is
>>> +	 * ongoing (LOGIN_FLAGS_WRITE_ACTIVE or LOGIN_FLAGS_READ_ACTIVE),
>>> +	 * sk_state_change will leave the cleanup to the delayed work or
>>> +	 * it will schedule a delayed work to do cleanup.
>>> +	 */
>>> +	if (conn->sock) {
>>> +		struct sock *sk = conn->sock->sk;
>>> +
>>> +		write_lock_bh(&sk->sk_callback_lock);
>>> +		clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
>>> +		set_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags);
>>> +		write_unlock_bh(&sk->sk_callback_lock);
>>> +	}
>>> +
>> Hey,
>>
>> I had one more question.
>>
>> With the above code, I think we have a race where if we clear the bit
>> above early and iscsi_target_sk_data_ready runs while
>> iscsi_target_do_login_rx is still running then we could queue the work
>> an extra time and get stuck. Because the bit is now not set, if
>> iscsi_target_sk_data_ready runs it will end up calling
>> schedule_delayed_work which will queue up the work again since the work
>> is running and not pending.

Yes. I was trying to allow queuing the delayed work early.

>>
>> If that is correct and we hit the race what happens if this was the last
>> login pdu, and we are supposed to go to full feature phase next? For
>> example if iscsi_target_do_login_rx runs an extra time, will we end up
>> stuck waiting in iscsi_target_do_login_rx's call to:
>>
>> rc = conn->conn_transport->iscsit_get_login_rx(conn, login);
>>
>> ?

For the last login pdu, we may have race as you said. thanks for 
pointing it out.

I was trying to image a case where we can hit the race, normally it is 
case a).

a). initiator send last login pdu -> target received -> target replied

b). initiator send last login pdu -> target received -> initiator send 
something -> target replied

in case b). we will queue another delayed work which we should not.  
After the target replied

the last login pdu, conn->conn_login is freed. we might visited it in 
the delayed work.


> Just answering my own question. It looks like we do not get stuck. But
> we either get nothing on the session so the login timeout fires and we
> drop the session. Or, we get a PDU and the login thread reads it in
> before the normal rx thread does, but it assumes it is a login related
> and we most likely drop the session due to invalid fields.
>
> I think in iscsi_target_restore_sock_callbacks we want to do a:
>
> cancel_delayed_work(&conn->login_work)
>
> after we reset the callbacks and drop the sk_callback_lock lock.

I am not very sure if we could or if it is good to cancel_delayed_work 
from the work itself.

If it is ok then i am ok with it. Or in another way, I think we could 
just clear

LOGIN_FLAGS_READ_ACTIVE and set LOGIN_FLAGS_WRITE_ACTIVE

after iscsi_target_restore_sock_callbacks when finish process last login 
pdu.

What do you think?



Thanks,

Hou


>
Hou Pu April 26, 2020, 6:09 a.m. UTC | #4
>>>> +     */
>>>> +    if (conn->sock) {
>>>> +        struct sock *sk = conn->sock->sk;
>>>> +
>>>> +        write_lock_bh(&sk->sk_callback_lock);
>>>> +        clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
>>>> +        set_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags);
>>>> +        write_unlock_bh(&sk->sk_callback_lock);
>>>> +    }
>>>> +
>>> Hey,
>>>
>>> I had one more question.
>>>
>>> With the above code, I think we have a race where if we clear the bit
>>> above early and iscsi_target_sk_data_ready runs while
>>> iscsi_target_do_login_rx is still running then we could queue the work
>>> an extra time and get stuck. Because the bit is now not set, if
>>> iscsi_target_sk_data_ready runs it will end up calling
>>> schedule_delayed_work which will queue up the work again since the work
>>> is running and not pending.
>
> Yes. I was trying to allow queuing the delayed work early.
>
>>>
>>> If that is correct and we hit the race what happens if this was the 
>>> last
>>> login pdu, and we are supposed to go to full feature phase next? For
>>> example if iscsi_target_do_login_rx runs an extra time, will we end up
>>> stuck waiting in iscsi_target_do_login_rx's call to:
>>>
>>> rc = conn->conn_transport->iscsit_get_login_rx(conn, login);
>>>
>>> ?
>
> For the last login pdu, we may have race as you said. thanks for 
> pointing it out.
>
> I was trying to image a case where we can hit the race, normally it is 
> case a).
>
> a). initiator send last login pdu -> target received -> target replied
>
> b). initiator send last login pdu -> target received -> initiator send 
> something -> target replied
>
> in case b). we will queue another delayed work which we should not.  
> After the target replied
>
> the last login pdu, conn->conn_login is freed. we might visited it in 
> the delayed work.
>
>
>> Just answering my own question. It looks like we do not get stuck. But
>> we either get nothing on the session so the login timeout fires and we
>> drop the session. Or, we get a PDU and the login thread reads it in
>> before the normal rx thread does, but it assumes it is a login related
>> and we most likely drop the session due to invalid fields.
>>
>> I think in iscsi_target_restore_sock_callbacks we want to do a:
>>
>> cancel_delayed_work(&conn->login_work)
>>
>> after we reset the callbacks and drop the sk_callback_lock lock.
>
> I am not very sure if we could or if it is good to cancel_delayed_work 
> from the work itself.
>
> If it is ok then i am ok with it. Or in another way, I think we could 
> just clear
>
> LOGIN_FLAGS_READ_ACTIVE and set LOGIN_FLAGS_WRITE_ACTIVE
>
> after iscsi_target_restore_sock_callbacks when finish process last 
> login pdu.

That would look like (in iscsi_target_do_tx_login_io):

diff --git a/drivers/target/iscsi/iscsi_target_nego.c 
b/drivers/target/iscsi/iscsi_target_nego.c
index 685d771b51d4..4d0658731382 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -328,6 +328,28 @@ static int iscsi_target_do_tx_login_io(struct 
iscsi_conn *conn, struct iscsi_log
         u32 padding = 0;
         struct iscsi_login_rsp *login_rsp;

+       /*
+        * LOGIN_FLAGS_READ_ACTIVE is cleared so that sk_data_ready
+        * could be trigger again after this.
+        *
+        * LOGIN_FLAGS_WRITE_ACTIVE is cleared after we successfully
+        * process a login pdu, so that sk_state_chage could do login
+        * cleanup as needed if the socket is closed. If a delayed work is
+        * ongoing (LOGIN_FLAGS_WRITE_ACTIVE or LOGIN_FLAGS_READ_ACTIVE),
+        * sk_state_change will leave the cleanup to the delayed work or
+        * it will schedule a delayed work to do cleanup.
+        */
+       if (conn->sock) {
+               struct sock *sk = conn->sock->sk;
+
+               write_lock_bh(&sk->sk_callback_lock);
+               if (!test_bit(LOGIN_FLAGS_INITIAL_PDU, 
&conn->login_flags)) {
+                       clear_bit(LOGIN_FLAGS_READ_ACTIVE, 
&conn->login_flags);
+                       set_bit(LOGIN_FLAGS_WRITE_ACTIVE, 
&conn->login_flags);
+               }
+               write_unlock_bh(&sk->sk_callback_lock);
+       }

>
> What do you think?
>
>
>
> Thanks,
>
> Hou
>
>
>>
Mike Christie April 28, 2020, 5:50 p.m. UTC | #5
On 4/26/20 1:09 AM, Hou Pu wrote:
> 
>>>>> +     */
>>>>> +    if (conn->sock) {
>>>>> +        struct sock *sk = conn->sock->sk;
>>>>> +
>>>>> +        write_lock_bh(&sk->sk_callback_lock);
>>>>> +        clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
>>>>> +        set_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags);
>>>>> +        write_unlock_bh(&sk->sk_callback_lock);
>>>>> +    }
>>>>> +
>>>> Hey,
>>>>
>>>> I had one more question.
>>>>
>>>> With the above code, I think we have a race where if we clear the bit
>>>> above early and iscsi_target_sk_data_ready runs while
>>>> iscsi_target_do_login_rx is still running then we could queue the work
>>>> an extra time and get stuck. Because the bit is now not set, if
>>>> iscsi_target_sk_data_ready runs it will end up calling
>>>> schedule_delayed_work which will queue up the work again since the work
>>>> is running and not pending.
>>
>> Yes. I was trying to allow queuing the delayed work early.
>>
>>>>
>>>> If that is correct and we hit the race what happens if this was the
>>>> last
>>>> login pdu, and we are supposed to go to full feature phase next? For
>>>> example if iscsi_target_do_login_rx runs an extra time, will we end up
>>>> stuck waiting in iscsi_target_do_login_rx's call to:
>>>>
>>>> rc = conn->conn_transport->iscsit_get_login_rx(conn, login);
>>>>
>>>> ?
>>
>> For the last login pdu, we may have race as you said. thanks for
>> pointing it out.
>>
>> I was trying to image a case where we can hit the race, normally it is
>> case a).
>>
>> a). initiator send last login pdu -> target received -> target replied
>>
>> b). initiator send last login pdu -> target received -> initiator send
>> something -> target replied
>>
>> in case b). we will queue another delayed work which we should not. 
>> After the target replied
>>
>> the last login pdu, conn->conn_login is freed. we might visited it in
>> the delayed wo>>
>>
>>> Just answering my own question. It looks like we do not get stuck. But
>>> we either get nothing on the session so the login timeout fires and we
>>> drop the session. Or, we get a PDU and the login thread reads it in
>>> before the normal rx thread does, but it assumes it is a login related
>>> and we most likely drop the session due to invalid fields.
>>>
>>> I think in iscsi_target_restore_sock_callbacks we want to do a:
>>>
>>> cancel_delayed_work(&conn->login_work)
>>>
>>> after we reset the callbacks and drop the sk_callback_lock lock.
>>
>> I am not very sure if we could or if it is good to cancel_delayed_work
>> from the work itself.
>>
>> If it is ok then i am ok with it. Or in another way, I think we could
>> just clear
>>
>> LOGIN_FLAGS_READ_ACTIVE and set LOGIN_FLAGS_WRITE_ACTIVE
>>
>> after iscsi_target_restore_sock_callbacks when finish process last
>> login pdu.
> 
> That would look like (in iscsi_target_do_tx_login_io):
> 
> diff --git a/drivers/target/iscsi/iscsi_target_nego.c
> b/drivers/target/iscsi/iscsi_target_nego.c
> index 685d771b51d4..4d0658731382 100644
> --- a/drivers/target/iscsi/iscsi_target_nego.c
> +++ b/drivers/target/iscsi/iscsi_target_nego.c
> @@ -328,6 +328,28 @@ static int iscsi_target_do_tx_login_io(struct
> iscsi_conn *conn, struct iscsi_log
>         u32 padding = 0;
>         struct iscsi_login_rsp *login_rsp;
> 
> +       /*
> +        * LOGIN_FLAGS_READ_ACTIVE is cleared so that sk_data_ready
> +        * could be trigger again after this.
> +        *
> +        * LOGIN_FLAGS_WRITE_ACTIVE is cleared after we successfully
> +        * process a login pdu, so that sk_state_chage could do login
> +        * cleanup as needed if the socket is closed. If a delayed work is
> +        * ongoing (LOGIN_FLAGS_WRITE_ACTIVE or LOGIN_FLAGS_READ_ACTIVE),
> +        * sk_state_change will leave the cleanup to the delayed work or
> +        * it will schedule a delayed work to do cleanup.
> +        */
> +       if (conn->sock) {
> +               struct sock *sk = conn->sock->sk;
> +
> +               write_lock_bh(&sk->sk_callback_lock);
> +               if (!test_bit(LOGIN_FLAGS_INITIAL_PDU,
> &conn->login_flags)) {
> +                       clear_bit(LOGIN_FLAGS_READ_ACTIVE,
> &conn->login_flags);
> +                       set_bit(LOGIN_FLAGS_WRITE_ACTIVE,
> &conn->login_flags);
> +               }
> +               write_unlock_bh(&sk->sk_callback_lock);
> +       }

You lost me. I didn't understand this part. Would you still be doing the
above bit manipulation in iscsi_target_do_login_rx too?

Is the above code then to handle when
iscsi_target_start_negotiation->iscsi_target_do_login->iscsi_target_do_tx_login_io
runs?

I was thinking when you mentioned the final login PDU you were going to
do something when you detect login->login_complete is true.

This code is not my expertise area, so I might just not be understanding
something simple about how it all works.
Hou Pu April 29, 2020, 6:59 a.m. UTC | #6
On 2020/4/29 1:50 上午, Mike Christie wrote:
> On 4/26/20 1:09 AM, Hou Pu wrote:
>>
>>>>>> +     */
>>>>>> +    if (conn->sock) {
>>>>>> +        struct sock *sk = conn->sock->sk;
>>>>>> +
>>>>>> +        write_lock_bh(&sk->sk_callback_lock);
>>>>>> +        clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
>>>>>> +        set_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags);
>>>>>> +        write_unlock_bh(&sk->sk_callback_lock);
>>>>>> +    }
>>>>>> +
>>>>> Hey,
>>>>>
>>>>> I had one more question.
>>>>>
>>>>> With the above code, I think we have a race where if we clear the bit
>>>>> above early and iscsi_target_sk_data_ready runs while
>>>>> iscsi_target_do_login_rx is still running then we could queue the work
>>>>> an extra time and get stuck. Because the bit is now not set, if
>>>>> iscsi_target_sk_data_ready runs it will end up calling
>>>>> schedule_delayed_work which will queue up the work again since the work
>>>>> is running and not pending.
>>>
>>> Yes. I was trying to allow queuing the delayed work early.
>>>
>>>>>
>>>>> If that is correct and we hit the race what happens if this was the
>>>>> last
>>>>> login pdu, and we are supposed to go to full feature phase next? For
>>>>> example if iscsi_target_do_login_rx runs an extra time, will we end up
>>>>> stuck waiting in iscsi_target_do_login_rx's call to:
>>>>>
>>>>> rc = conn->conn_transport->iscsit_get_login_rx(conn, login);
>>>>>
>>>>> ?
>>>
>>> For the last login pdu, we may have race as you said. thanks for
>>> pointing it out.
>>>
>>> I was trying to image a case where we can hit the race, normally it is
>>> case a).
>>>
>>> a). initiator send last login pdu -> target received -> target replied
>>>
>>> b). initiator send last login pdu -> target received -> initiator send
>>> something -> target replied
>>>
>>> in case b). we will queue another delayed work which we should not.
>>> After the target replied
>>>
>>> the last login pdu, conn->conn_login is freed. we might visited it in
>>> the delayed wo>>
>>>
>>>> Just answering my own question. It looks like we do not get stuck. But
>>>> we either get nothing on the session so the login timeout fires and we
>>>> drop the session. Or, we get a PDU and the login thread reads it in
>>>> before the normal rx thread does, but it assumes it is a login related
>>>> and we most likely drop the session due to invalid fields.
>>>>
>>>> I think in iscsi_target_restore_sock_callbacks we want to do a:
>>>>
>>>> cancel_delayed_work(&conn->login_work)
>>>>
>>>> after we reset the callbacks and drop the sk_callback_lock lock.
>>>
>>> I am not very sure if we could or if it is good to cancel_delayed_work
>>> from the work itself.
>>>
>>> If it is ok then i am ok with it. Or in another way, I think we could
>>> just clear
>>>
>>> LOGIN_FLAGS_READ_ACTIVE and set LOGIN_FLAGS_WRITE_ACTIVE
>>>
>>> after iscsi_target_restore_sock_callbacks when finish process last
>>> login pdu.
>>
>> That would look like (in iscsi_target_do_tx_login_io):
>>
>> diff --git a/drivers/target/iscsi/iscsi_target_nego.c
>> b/drivers/target/iscsi/iscsi_target_nego.c
>> index 685d771b51d4..4d0658731382 100644
>> --- a/drivers/target/iscsi/iscsi_target_nego.c
>> +++ b/drivers/target/iscsi/iscsi_target_nego.c
>> @@ -328,6 +328,28 @@ static int iscsi_target_do_tx_login_io(struct
>> iscsi_conn *conn, struct iscsi_log
>>          u32 padding = 0;
>>          struct iscsi_login_rsp *login_rsp;
>>
>> +       /*
>> +        * LOGIN_FLAGS_READ_ACTIVE is cleared so that sk_data_ready
>> +        * could be trigger again after this.
>> +        *
>> +        * LOGIN_FLAGS_WRITE_ACTIVE is cleared after we successfully
>> +        * process a login pdu, so that sk_state_chage could do login
>> +        * cleanup as needed if the socket is closed. If a delayed work is
>> +        * ongoing (LOGIN_FLAGS_WRITE_ACTIVE or LOGIN_FLAGS_READ_ACTIVE),
>> +        * sk_state_change will leave the cleanup to the delayed work or
>> +        * it will schedule a delayed work to do cleanup.
>> +        */
>> +       if (conn->sock) {
>> +               struct sock *sk = conn->sock->sk;
>> +
>> +               write_lock_bh(&sk->sk_callback_lock);
>> +               if (!test_bit(LOGIN_FLAGS_INITIAL_PDU,
>> &conn->login_flags)) {
>> +                       clear_bit(LOGIN_FLAGS_READ_ACTIVE,
>> &conn->login_flags);
>> +                       set_bit(LOGIN_FLAGS_WRITE_ACTIVE,
>> &conn->login_flags);
>> +               }
>> +               write_unlock_bh(&sk->sk_callback_lock);
>> +       }
> 
> You lost me. I didn't understand this part. Would you still be doing the
> above bit manipulation in iscsi_target_do_login_rx too?

By moving this chunk here(from iscsi_target_do_login_rx to 
iscsi_target_do_tx_login_io), we have already
restore sock callbacks(like below in iscsi_target_do_login),
So clearing READ_ACTIVE would not cause another delayed
work being scheduled.

So I think we would not have the race you pointed out
for last login pdu.

[code snippet

iscsi_target_do_login()
...
login->login_complete = 1;
iscsi_target_restore_sock_callbacks(conn);
if (iscsi_target_do_tx_login_io(conn,
                           login) < 0)

...
]



> 
> Is the above code then to handle when
> iscsi_target_start_negotiation->iscsi_target_do_login->iscsi_target_do_tx_login_io
> runs?
> 
> I was thinking when you mentioned the final login PDU you were going to
> do something when you detect login->login_complete is true.
> 

Even if we move above chunk here, I realize that we
could still theoretically have problem.

For login pdus which is not initial or last login pdus,
after enable scheduling delayed work by clearing READ_ACTIVE.
It is possible to queue a delayed work. (i.e. a out of order
initiator could send pdus at any time).
In case we received such out of order pdu, we need
to cancel them when we have an error and later
doing the clean up.

I will try to solve this and resend these patches.
Maybe it's better to cancel_delayed_work() in
iscsi_target_do_login_rx.

I might not express myself very clear. Please
see next version of these patch.


> This code is not my expertise area, so I might just not be understanding
> something simple about how it all works.

Your comments make sense and are helpful for me. Thanks.
> 

Thanks,
Hou
diff mbox series

Patch

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 685d771b51d4..4cfa742e61df 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -622,6 +622,26 @@  static void iscsi_target_do_login_rx(struct work_struct *work)
 	if (rc < 0)
 		goto err;
 
+	/*
+	 * LOGIN_FLAGS_READ_ACTIVE is cleared so that sk_data_ready
+	 * could be trigger again after this.
+	 *
+	 * LOGIN_FLAGS_WRITE_ACTIVE is cleared after we successfully
+	 * process a login pdu, so that sk_state_chage could do login
+	 * cleanup as needed if the socket is closed. If a delayed work is
+	 * ongoing (LOGIN_FLAGS_WRITE_ACTIVE or LOGIN_FLAGS_READ_ACTIVE),
+	 * sk_state_change will leave the cleanup to the delayed work or
+	 * it will schedule a delayed work to do cleanup.
+	 */
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
+
+		write_lock_bh(&sk->sk_callback_lock);
+		clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
+		set_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+
 	pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n",
 			conn, current->comm, current->pid);
 
@@ -629,7 +649,8 @@  static void iscsi_target_do_login_rx(struct work_struct *work)
 	if (rc < 0) {
 		goto err;
 	} else if (!rc) {
-		if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE))
+		if (iscsi_target_sk_check_and_clear(conn,
+						    LOGIN_FLAGS_WRITE_ACTIVE))
 			goto err;
 	} else if (rc == 1) {
 		iscsi_target_nego_release(conn);
@@ -670,9 +691,10 @@  static void iscsi_target_sk_state_change(struct sock *sk)
 	state = __iscsi_target_sk_check_close(sk);
 	pr_debug("__iscsi_target_sk_close_change: state: %d\n", state);
 
-	if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) {
-		pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change"
-			 " conn: %p\n", conn);
+	if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags) ||
+	    test_bit(LOGIN_FLAGS_WRITE_ACTIVE, &conn->login_flags)) {
+		pr_debug("Got LOGIN_FLAGS_{READ|WRITE}_ACTIVE=1"
+			 " sk_state_change conn: %p\n", conn);
 		if (state)
 			set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags);
 		write_unlock_bh(&sk->sk_callback_lock);
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 591cd9e4692c..844bef255e89 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -566,10 +566,11 @@  struct iscsi_conn {
 	struct socket		*sock;
 	void			(*orig_data_ready)(struct sock *);
 	void			(*orig_state_change)(struct sock *);
-#define LOGIN_FLAGS_READ_ACTIVE		1
-#define LOGIN_FLAGS_CLOSED		2
-#define LOGIN_FLAGS_READY		4
-#define LOGIN_FLAGS_INITIAL_PDU		8
+#define LOGIN_FLAGS_READY		0
+#define LOGIN_FLAGS_INITIAL_PDU		1
+#define LOGIN_FLAGS_READ_ACTIVE		2
+#define LOGIN_FLAGS_WRITE_ACTIVE	3
+#define LOGIN_FLAGS_CLOSED		4
 	unsigned long		login_flags;
 	struct delayed_work	login_work;
 	struct iscsi_login	*login;