diff mbox series

[net,1/5] net/smc: fix dangling sock under state SMC_APPFINCLOSEWAIT

Message ID 1697009600-22367-2-git-send-email-alibuda@linux.alibaba.com (mailing list archive)
State Not Applicable
Headers show
Series net/smc: bugfixs for smc-r | expand

Commit Message

D. Wythe Oct. 11, 2023, 7:33 a.m. UTC
From: "D. Wythe" <alibuda@linux.alibaba.com>

Considering scenario:

				smc_cdc_rx_handler_rwwi
__smc_release
				sock_set_flag
smc_close_active()
sock_set_flag

__set_bit(DEAD)			__set_bit(DONE)

Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
in smc_close_passive_work:

if (sock_flag(sk, SOCK_DEAD) &&
	smc_close_sent_any_close(conn)) {
	sk->sk_state = SMC_CLOSED;
} else {
	/* just shutdown, but not yet closed locally */
	sk->sk_state = SMC_APPFINCLOSEWAIT;
}

Replace sock_set_flags or __set_bit to set_bit will fix this problem.
Since set_bit is atomic.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/smc/af_smc.c    | 4 ++--
 net/smc/smc.h       | 5 +++++
 net/smc/smc_cdc.c   | 2 +-
 net/smc/smc_close.c | 2 +-
 4 files changed, 9 insertions(+), 4 deletions(-)

Comments

Dust Li Oct. 11, 2023, 2 p.m. UTC | #1
On Wed, Oct 11, 2023 at 03:33:16PM +0800, D. Wythe wrote:
>From: "D. Wythe" <alibuda@linux.alibaba.com>
>
>Considering scenario:
>
>				smc_cdc_rx_handler_rwwi
>__smc_release
>				sock_set_flag
>smc_close_active()
>sock_set_flag
>
>__set_bit(DEAD)			__set_bit(DONE)

If I understand correctly, both operations should hold sock_lock,
that means thay should not race, have I missed something ?

>
>Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
>in smc_close_passive_work:
>
>if (sock_flag(sk, SOCK_DEAD) &&
>	smc_close_sent_any_close(conn)) {
>	sk->sk_state = SMC_CLOSED;
>} else {
>	/* just shutdown, but not yet closed locally */
>	sk->sk_state = SMC_APPFINCLOSEWAIT;
>}
>
>Replace sock_set_flags or __set_bit to set_bit will fix this problem.
>Since set_bit is atomic.
>

You should add a fixes tag.
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> net/smc/af_smc.c    | 4 ++--
> net/smc/smc.h       | 5 +++++
> net/smc/smc_cdc.c   | 2 +-
> net/smc/smc_close.c | 2 +-
> 4 files changed, 9 insertions(+), 4 deletions(-)
>
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index bacdd97..5ad2a9f 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc)
> 
> 	if (!smc->use_fallback) {
> 		rc = smc_close_active(smc);
>-		sock_set_flag(sk, SOCK_DEAD);
>+		smc_sock_set_flag(sk, SOCK_DEAD);
> 		sk->sk_shutdown |= SHUTDOWN_MASK;
> 	} else {
> 		if (sk->sk_state != SMC_CLOSED) {
>@@ -1742,7 +1742,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
> 		if (new_clcsock)
> 			sock_release(new_clcsock);
> 		new_sk->sk_state = SMC_CLOSED;
>-		sock_set_flag(new_sk, SOCK_DEAD);
>+		smc_sock_set_flag(new_sk, SOCK_DEAD);
> 		sock_put(new_sk); /* final */
> 		*new_smc = NULL;
> 		goto out;
>diff --git a/net/smc/smc.h b/net/smc/smc.h
>index 24745fd..e377980 100644
>--- a/net/smc/smc.h
>+++ b/net/smc/smc.h
>@@ -377,4 +377,9 @@ void smc_fill_gid_list(struct smc_link_group *lgr,
> int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
> int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
> 
>+static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag)
>+{
>+	set_bit(flag, &sk->sk_flags);
>+}
>+
> #endif	/* __SMC_H */
>diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
>index 89105e9..01bdb79 100644
>--- a/net/smc/smc_cdc.c
>+++ b/net/smc/smc_cdc.c
>@@ -385,7 +385,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
> 		smc->sk.sk_shutdown |= RCV_SHUTDOWN;
> 		if (smc->clcsock && smc->clcsock->sk)
> 			smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
>-		sock_set_flag(&smc->sk, SOCK_DONE);
>+		smc_sock_set_flag(&smc->sk, SOCK_DONE);
> 		sock_hold(&smc->sk); /* sock_put in close_work */
> 		if (!queue_work(smc_close_wq, &conn->close_work))
> 			sock_put(&smc->sk);
>diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
>index dbdf03e..449ef45 100644
>--- a/net/smc/smc_close.c
>+++ b/net/smc/smc_close.c
>@@ -173,7 +173,7 @@ void smc_close_active_abort(struct smc_sock *smc)
> 		break;
> 	}
> 
>-	sock_set_flag(sk, SOCK_DEAD);
>+	smc_sock_set_flag(sk, SOCK_DEAD);
> 	sk->sk_state_change(sk);
> 
> 	if (release_clcsock) {
>-- 
>1.8.3.1
Wenjia Zhang Oct. 11, 2023, 8:31 p.m. UTC | #2
On 11.10.23 09:33, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> Considering scenario:
> 
> 				smc_cdc_rx_handler_rwwi
> __smc_release
> 				sock_set_flag
> smc_close_active()
> sock_set_flag
> 
> __set_bit(DEAD)			__set_bit(DONE)
> 
> Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
> if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
> in smc_close_passive_work:
> 
> if (sock_flag(sk, SOCK_DEAD) &&
> 	smc_close_sent_any_close(conn)) {
> 	sk->sk_state = SMC_CLOSED;
> } else {
> 	/* just shutdown, but not yet closed locally */
> 	sk->sk_state = SMC_APPFINCLOSEWAIT;
> }
> 
> Replace sock_set_flags or __set_bit to set_bit will fix this problem.
> Since set_bit is atomic.
> 
I didn't really understand the scenario. What is 
smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock during 
the runtime?

> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>   net/smc/af_smc.c    | 4 ++--
>   net/smc/smc.h       | 5 +++++
>   net/smc/smc_cdc.c   | 2 +-
>   net/smc/smc_close.c | 2 +-
>   4 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index bacdd97..5ad2a9f 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc)
>   
>   	if (!smc->use_fallback) {
>   		rc = smc_close_active(smc);
> -		sock_set_flag(sk, SOCK_DEAD);
> +		smc_sock_set_flag(sk, SOCK_DEAD);
>   		sk->sk_shutdown |= SHUTDOWN_MASK;
>   	} else {
>   		if (sk->sk_state != SMC_CLOSED) {
> @@ -1742,7 +1742,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
>   		if (new_clcsock)
>   			sock_release(new_clcsock);
>   		new_sk->sk_state = SMC_CLOSED;
> -		sock_set_flag(new_sk, SOCK_DEAD);
> +		smc_sock_set_flag(new_sk, SOCK_DEAD);
>   		sock_put(new_sk); /* final */
>   		*new_smc = NULL;
>   		goto out;
> diff --git a/net/smc/smc.h b/net/smc/smc.h
> index 24745fd..e377980 100644
> --- a/net/smc/smc.h
> +++ b/net/smc/smc.h
> @@ -377,4 +377,9 @@ void smc_fill_gid_list(struct smc_link_group *lgr,
>   int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
>   int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
>   
> +static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag)
> +{
> +	set_bit(flag, &sk->sk_flags);
> +}
> +
>   #endif	/* __SMC_H */
> diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
> index 89105e9..01bdb79 100644
> --- a/net/smc/smc_cdc.c
> +++ b/net/smc/smc_cdc.c
> @@ -385,7 +385,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
>   		smc->sk.sk_shutdown |= RCV_SHUTDOWN;
>   		if (smc->clcsock && smc->clcsock->sk)
>   			smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
> -		sock_set_flag(&smc->sk, SOCK_DONE);
> +		smc_sock_set_flag(&smc->sk, SOCK_DONE);
>   		sock_hold(&smc->sk); /* sock_put in close_work */
>   		if (!queue_work(smc_close_wq, &conn->close_work))
>   			sock_put(&smc->sk);
> diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
> index dbdf03e..449ef45 100644
> --- a/net/smc/smc_close.c
> +++ b/net/smc/smc_close.c
> @@ -173,7 +173,7 @@ void smc_close_active_abort(struct smc_sock *smc)
>   		break;
>   	}
>   
> -	sock_set_flag(sk, SOCK_DEAD);
> +	smc_sock_set_flag(sk, SOCK_DEAD);
>   	sk->sk_state_change(sk);
>   
>   	if (release_clcsock) {
D. Wythe Oct. 12, 2023, 2:47 a.m. UTC | #3
On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>
>
> On 11.10.23 09:33, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> Considering scenario:
>>
>>                 smc_cdc_rx_handler_rwwi
>> __smc_release
>>                 sock_set_flag
>> smc_close_active()
>> sock_set_flag
>>
>> __set_bit(DEAD)            __set_bit(DONE)
>>
>> Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>> if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
>> in smc_close_passive_work:
>>
>> if (sock_flag(sk, SOCK_DEAD) &&
>>     smc_close_sent_any_close(conn)) {
>>     sk->sk_state = SMC_CLOSED;
>> } else {
>>     /* just shutdown, but not yet closed locally */
>>     sk->sk_state = SMC_APPFINCLOSEWAIT;
>> }
>>
>> Replace sock_set_flags or __set_bit to set_bit will fix this problem.
>> Since set_bit is atomic.
>>
> I didn't really understand the scenario. What is 
> smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock 
> during the runtime?
>

Hi Wenjia,

Sorry for that, It is not smc_cdc_rx_handler_rwwi() but 
smc_cdc_rx_handler();

Following is a more specific description of the issues


lock_sock()
__smc_release

smc_cdc_rx_handler()
smc_cdc_msg_recv()
bh_lock_sock()
smc_cdc_msg_recv_action()
sock_set_flag(DONE) sock_set_flag(DEAD)
__set_bit __set_bit
bh_unlock_sock()
release_sock()


Note :  bh_lock_sock and lock_sock are not mutually exclusive.
They are actually used for different purposes and contexts.


>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   net/smc/af_smc.c    | 4 ++--
>>   net/smc/smc.h       | 5 +++++
>>   net/smc/smc_cdc.c   | 2 +-
>>   net/smc/smc_close.c | 2 +-
>>   4 files changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index bacdd97..5ad2a9f 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc)
>>         if (!smc->use_fallback) {
>>           rc = smc_close_active(smc);
>> -        sock_set_flag(sk, SOCK_DEAD);
>> +        smc_sock_set_flag(sk, SOCK_DEAD);
>>           sk->sk_shutdown |= SHUTDOWN_MASK;
>>       } else {
>>           if (sk->sk_state != SMC_CLOSED) {
>> @@ -1742,7 +1742,7 @@ static int smc_clcsock_accept(struct smc_sock 
>> *lsmc, struct smc_sock **new_smc)
>>           if (new_clcsock)
>>               sock_release(new_clcsock);
>>           new_sk->sk_state = SMC_CLOSED;
>> -        sock_set_flag(new_sk, SOCK_DEAD);
>> +        smc_sock_set_flag(new_sk, SOCK_DEAD);
>>           sock_put(new_sk); /* final */
>>           *new_smc = NULL;
>>           goto out;
>> diff --git a/net/smc/smc.h b/net/smc/smc.h
>> index 24745fd..e377980 100644
>> --- a/net/smc/smc.h
>> +++ b/net/smc/smc.h
>> @@ -377,4 +377,9 @@ void smc_fill_gid_list(struct smc_link_group *lgr,
>>   int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct 
>> genl_info *info);
>>   int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct 
>> genl_info *info);
>>   +static inline void smc_sock_set_flag(struct sock *sk, enum 
>> sock_flags flag)
>> +{
>> +    set_bit(flag, &sk->sk_flags);
>> +}
>> +
>>   #endif    /* __SMC_H */
>> diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
>> index 89105e9..01bdb79 100644
>> --- a/net/smc/smc_cdc.c
>> +++ b/net/smc/smc_cdc.c
>> @@ -385,7 +385,7 @@ static void smc_cdc_msg_recv_action(struct 
>> smc_sock *smc,
>>           smc->sk.sk_shutdown |= RCV_SHUTDOWN;
>>           if (smc->clcsock && smc->clcsock->sk)
>>               smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
>> -        sock_set_flag(&smc->sk, SOCK_DONE);
>> +        smc_sock_set_flag(&smc->sk, SOCK_DONE);
>>           sock_hold(&smc->sk); /* sock_put in close_work */
>>           if (!queue_work(smc_close_wq, &conn->close_work))
>>               sock_put(&smc->sk);
>> diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
>> index dbdf03e..449ef45 100644
>> --- a/net/smc/smc_close.c
>> +++ b/net/smc/smc_close.c
>> @@ -173,7 +173,7 @@ void smc_close_active_abort(struct smc_sock *smc)
>>           break;
>>       }
>>   -    sock_set_flag(sk, SOCK_DEAD);
>> +    smc_sock_set_flag(sk, SOCK_DEAD);
>>       sk->sk_state_change(sk);
>>         if (release_clcsock) {
Wenjia Zhang Oct. 12, 2023, 11:51 a.m. UTC | #4
On 12.10.23 04:37, D. Wythe wrote:
> 
> 
> On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>>
>>
>> On 11.10.23 09:33, D. Wythe wrote:
>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>
>>> Considering scenario:
>>>
>>>                 smc_cdc_rx_handler_rwwi
>>> __smc_release
>>>                 sock_set_flag
>>> smc_close_active()
>>> sock_set_flag
>>>
>>> __set_bit(DEAD)            __set_bit(DONE)
>>>
>>> Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>>> if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
>>> in smc_close_passive_work:
>>>
>>> if (sock_flag(sk, SOCK_DEAD) &&
>>>     smc_close_sent_any_close(conn)) {
>>>     sk->sk_state = SMC_CLOSED;
>>> } else {
>>>     /* just shutdown, but not yet closed locally */
>>>     sk->sk_state = SMC_APPFINCLOSEWAIT;
>>> }
>>>
>>> Replace sock_set_flags or __set_bit to set_bit will fix this problem.
>>> Since set_bit is atomic.
>>>
>> I didn't really understand the scenario. What is 
>> smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock 
>> during the runtime?
>>
> 
> Hi Wenjia,
> 
> Sorry for that, It is not smc_cdc_rx_handler_rwwi() but 
> smc_cdc_rx_handler();
> 
> Following is a more specific description of the issues
> 
> 
> lock_sock()
> __smc_release
> 
> smc_cdc_rx_handler()
> smc_cdc_msg_recv()
> bh_lock_sock()
> smc_cdc_msg_recv_action()
> sock_set_flag(DONE) sock_set_flag(DEAD)
> __set_bit __set_bit
> bh_unlock_sock()
> release_sock()
> 
> 
> 
> Note : |bh_lock_sock|and |lock_sock|are not mutually exclusive. They are 
> actually used for different purposes and contexts.
> 
> 
ok, that's true that |bh_lock_sock|and |lock_sock|are not really 
mutually exclusive. However, since bh_lock_sock() is used, this scenario 
you described above should not happen, because that gets the 
sk_lock.slock. Following this scenarios, IMO, only the following 
situation can happen.

lock_sock()
__smc_release

smc_cdc_rx_handler()
smc_cdc_msg_recv()
bh_lock_sock()
smc_cdc_msg_recv_action()
sock_set_flag(DONE)
bh_unlock_sock()
sock_set_flag(DEAD)
release_sock()

> 
>>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>>> ---
>>>   net/smc/af_smc.c    | 4 ++--
>>>   net/smc/smc.h       | 5 +++++
>>>   net/smc/smc_cdc.c   | 2 +-
>>>   net/smc/smc_close.c | 2 +-
>>>   4 files changed, 9 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>>> index bacdd97..5ad2a9f 100644
>>> --- a/net/smc/af_smc.c
>>> +++ b/net/smc/af_smc.c
>>> @@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc)
>>>         if (!smc->use_fallback) {
>>>           rc = smc_close_active(smc);
>>> -        sock_set_flag(sk, SOCK_DEAD);
>>> +        smc_sock_set_flag(sk, SOCK_DEAD);
>>>           sk->sk_shutdown |= SHUTDOWN_MASK;
>>>       } else {
>>>           if (sk->sk_state != SMC_CLOSED) {
>>> @@ -1742,7 +1742,7 @@ static int smc_clcsock_accept(struct smc_sock 
>>> *lsmc, struct smc_sock **new_smc)
>>>           if (new_clcsock)
>>>               sock_release(new_clcsock);
>>>           new_sk->sk_state = SMC_CLOSED;
>>> -        sock_set_flag(new_sk, SOCK_DEAD);
>>> +        smc_sock_set_flag(new_sk, SOCK_DEAD);
>>>           sock_put(new_sk); /* final */
>>>           *new_smc = NULL;
>>>           goto out;
>>> diff --git a/net/smc/smc.h b/net/smc/smc.h
>>> index 24745fd..e377980 100644
>>> --- a/net/smc/smc.h
>>> +++ b/net/smc/smc.h
>>> @@ -377,4 +377,9 @@ void smc_fill_gid_list(struct smc_link_group *lgr,
>>>   int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct 
>>> genl_info *info);
>>>   int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct 
>>> genl_info *info);
>>>   +static inline void smc_sock_set_flag(struct sock *sk, enum 
>>> sock_flags flag)
>>> +{
>>> +    set_bit(flag, &sk->sk_flags);
>>> +}
>>> +
>>>   #endif    /* __SMC_H */
>>> diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
>>> index 89105e9..01bdb79 100644
>>> --- a/net/smc/smc_cdc.c
>>> +++ b/net/smc/smc_cdc.c
>>> @@ -385,7 +385,7 @@ static void smc_cdc_msg_recv_action(struct 
>>> smc_sock *smc,
>>>           smc->sk.sk_shutdown |= RCV_SHUTDOWN;
>>>           if (smc->clcsock && smc->clcsock->sk)
>>>               smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
>>> -        sock_set_flag(&smc->sk, SOCK_DONE);
>>> +        smc_sock_set_flag(&smc->sk, SOCK_DONE);
>>>           sock_hold(&smc->sk); /* sock_put in close_work */
>>>           if (!queue_work(smc_close_wq, &conn->close_work))
>>>               sock_put(&smc->sk);
>>> diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
>>> index dbdf03e..449ef45 100644
>>> --- a/net/smc/smc_close.c
>>> +++ b/net/smc/smc_close.c
>>> @@ -173,7 +173,7 @@ void smc_close_active_abort(struct smc_sock *smc)
>>>           break;
>>>       }
>>>   -    sock_set_flag(sk, SOCK_DEAD);
>>> +    smc_sock_set_flag(sk, SOCK_DEAD);
>>>       sk->sk_state_change(sk);
>>>         if (release_clcsock) {
>
Dust Li Oct. 13, 2023, 5:32 a.m. UTC | #5
On Thu, Oct 12, 2023 at 01:51:54PM +0200, Wenjia Zhang wrote:
>
>
>On 12.10.23 04:37, D. Wythe wrote:
>> 
>> 
>> On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>> > 
>> > 
>> > On 11.10.23 09:33, D. Wythe wrote:
>> > > From: "D. Wythe" <alibuda@linux.alibaba.com>
>> > > 
>> > > Considering scenario:
>> > > 
>> > >                 smc_cdc_rx_handler_rwwi
>> > > __smc_release
>> > >                 sock_set_flag
>> > > smc_close_active()
>> > > sock_set_flag
>> > > 
>> > > __set_bit(DEAD)            __set_bit(DONE)
>> > > 
>> > > Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>> > > if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
>> > > in smc_close_passive_work:
>> > > 
>> > > if (sock_flag(sk, SOCK_DEAD) &&
>> > >     smc_close_sent_any_close(conn)) {
>> > >     sk->sk_state = SMC_CLOSED;
>> > > } else {
>> > >     /* just shutdown, but not yet closed locally */
>> > >     sk->sk_state = SMC_APPFINCLOSEWAIT;
>> > > }
>> > > 
>> > > Replace sock_set_flags or __set_bit to set_bit will fix this problem.
>> > > Since set_bit is atomic.
>> > > 
>> > I didn't really understand the scenario. What is
>> > smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock
>> > during the runtime?
>> > 
>> 
>> Hi Wenjia,
>> 
>> Sorry for that, It is not smc_cdc_rx_handler_rwwi() but
>> smc_cdc_rx_handler();
>> 
>> Following is a more specific description of the issues
>> 
>> 
>> lock_sock()
>> __smc_release
>> 
>> smc_cdc_rx_handler()
>> smc_cdc_msg_recv()
>> bh_lock_sock()
>> smc_cdc_msg_recv_action()
>> sock_set_flag(DONE) sock_set_flag(DEAD)
>> __set_bit __set_bit
>> bh_unlock_sock()
>> release_sock()
>> 
>> 
>> 
>> Note : |bh_lock_sock|and |lock_sock|are not mutually exclusive. They are
>> actually used for different purposes and contexts.
>> 
>> 
>ok, that's true that |bh_lock_sock|and |lock_sock|are not really mutually
>exclusive. However, since bh_lock_sock() is used, this scenario you described
>above should not happen, because that gets the sk_lock.slock. Following this
>scenarios, IMO, only the following situation can happen.
>
>lock_sock()
>__smc_release
>
>smc_cdc_rx_handler()
>smc_cdc_msg_recv()
>bh_lock_sock()
>smc_cdc_msg_recv_action()
>sock_set_flag(DONE)
>bh_unlock_sock()
>sock_set_flag(DEAD)
>release_sock()

Hi wenjia,

I think I know what D. Wythe means now, and I think he is right on this.

IIUC, in process context, lock_sock() won't respect bh_lock_sock() if it
acquires the lock before bh_lock_sock(). This is how the sock lock works.

    PROCESS CONTEXT                                 INTERRUPT CONTEXT
------------------------------------------------------------------------
lock_sock()
    spin_lock_bh(&sk->sk_lock.slock);
    ...
    sk->sk_lock.owned = 1;
    // here the spinlock is released
    spin_unlock_bh(&sk->sk_lock.slock);
__smc_release()
                                                   bh_lock_sock(&smc->sk);
                                                   smc_cdc_msg_recv_action(smc, cdc);
                                                       sock_set_flag(&smc->sk, SOCK_DONE);
                                                   bh_unlock_sock(&smc->sk);

    sock_set_flag(DEAD)  <-- Can be before or after sock_set_flag(DONE)
release_sock()

The bh_lock_sock() only spins on sk->sk_lock.slock, which is already released
after lock_sock() return. Therefor, there is actually no lock between
the code after lock_sock() and before release_sock() with bh_lock_sock()...bh_unlock_sock().
Thus, sock_set_flag(DEAD) won't respect bh_lock_sock() at all, and might be
before or after sock_set_flag(DONE).


Actually, in TCP, the interrupt context will check sock_owned_by_user().
If it returns true, the softirq just defer the process to backlog, and process
that in release_sock(). Which avoid the race between softirq and process
when visiting the 'struct sock'.

tcp_v4_rcv()
         bh_lock_sock_nested(sk);
         tcp_segs_in(tcp_sk(sk), skb);
         ret = 0;
         if (!sock_owned_by_user(sk)) {
                 ret = tcp_v4_do_rcv(sk, skb);
         } else {
                 if (tcp_add_backlog(sk, skb, &drop_reason))
                         goto discard_and_relse;
         }
         bh_unlock_sock(sk);


But in SMC we don't have a backlog, that means fields in 'struct sock'
might all have race, and this sock_set_flag() is just one of the cases.

Best regards,
Dust



>
>> 
>> > > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> > > ---
>> > >   net/smc/af_smc.c    | 4 ++--
>> > >   net/smc/smc.h       | 5 +++++
>> > >   net/smc/smc_cdc.c   | 2 +-
>> > >   net/smc/smc_close.c | 2 +-
>> > >   4 files changed, 9 insertions(+), 4 deletions(-)
>> > > 
>> > > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> > > index bacdd97..5ad2a9f 100644
>> > > --- a/net/smc/af_smc.c
>> > > +++ b/net/smc/af_smc.c
>> > > @@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc)
>> > >         if (!smc->use_fallback) {
>> > >           rc = smc_close_active(smc);
>> > > -        sock_set_flag(sk, SOCK_DEAD);
>> > > +        smc_sock_set_flag(sk, SOCK_DEAD);
>> > >           sk->sk_shutdown |= SHUTDOWN_MASK;
>> > >       } else {
>> > >           if (sk->sk_state != SMC_CLOSED) {
>> > > @@ -1742,7 +1742,7 @@ static int smc_clcsock_accept(struct
>> > > smc_sock *lsmc, struct smc_sock **new_smc)
>> > >           if (new_clcsock)
>> > >               sock_release(new_clcsock);
>> > >           new_sk->sk_state = SMC_CLOSED;
>> > > -        sock_set_flag(new_sk, SOCK_DEAD);
>> > > +        smc_sock_set_flag(new_sk, SOCK_DEAD);
>> > >           sock_put(new_sk); /* final */
>> > >           *new_smc = NULL;
>> > >           goto out;
>> > > diff --git a/net/smc/smc.h b/net/smc/smc.h
>> > > index 24745fd..e377980 100644
>> > > --- a/net/smc/smc.h
>> > > +++ b/net/smc/smc.h
>> > > @@ -377,4 +377,9 @@ void smc_fill_gid_list(struct smc_link_group *lgr,
>> > >   int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct
>> > > genl_info *info);
>> > >   int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct
>> > > genl_info *info);
>> > >   +static inline void smc_sock_set_flag(struct sock *sk, enum
>> > > sock_flags flag)
>> > > +{
>> > > +    set_bit(flag, &sk->sk_flags);
>> > > +}
>> > > +
>> > >   #endif    /* __SMC_H */
>> > > diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
>> > > index 89105e9..01bdb79 100644
>> > > --- a/net/smc/smc_cdc.c
>> > > +++ b/net/smc/smc_cdc.c
>> > > @@ -385,7 +385,7 @@ static void smc_cdc_msg_recv_action(struct
>> > > smc_sock *smc,
>> > >           smc->sk.sk_shutdown |= RCV_SHUTDOWN;
>> > >           if (smc->clcsock && smc->clcsock->sk)
>> > >               smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
>> > > -        sock_set_flag(&smc->sk, SOCK_DONE);
>> > > +        smc_sock_set_flag(&smc->sk, SOCK_DONE);
>> > >           sock_hold(&smc->sk); /* sock_put in close_work */
>> > >           if (!queue_work(smc_close_wq, &conn->close_work))
>> > >               sock_put(&smc->sk);
>> > > diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
>> > > index dbdf03e..449ef45 100644
>> > > --- a/net/smc/smc_close.c
>> > > +++ b/net/smc/smc_close.c
>> > > @@ -173,7 +173,7 @@ void smc_close_active_abort(struct smc_sock *smc)
>> > >           break;
>> > >       }
>> > >   -    sock_set_flag(sk, SOCK_DEAD);
>> > > +    smc_sock_set_flag(sk, SOCK_DEAD);
>> > >       sk->sk_state_change(sk);
>> > >         if (release_clcsock) {
>>
Wenjia Zhang Oct. 13, 2023, 11:52 a.m. UTC | #6
On 13.10.23 07:32, Dust Li wrote:
> On Thu, Oct 12, 2023 at 01:51:54PM +0200, Wenjia Zhang wrote:
>>
>>
>> On 12.10.23 04:37, D. Wythe wrote:
>>>
>>>
>>> On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>>>>
>>>>
>>>> On 11.10.23 09:33, D. Wythe wrote:
>>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>>
>>>>> Considering scenario:
>>>>>
>>>>>                  smc_cdc_rx_handler_rwwi
>>>>> __smc_release
>>>>>                  sock_set_flag
>>>>> smc_close_active()
>>>>> sock_set_flag
>>>>>
>>>>> __set_bit(DEAD)            __set_bit(DONE)
>>>>>
>>>>> Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>>>>> if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
>>>>> in smc_close_passive_work:
>>>>>
>>>>> if (sock_flag(sk, SOCK_DEAD) &&
>>>>>      smc_close_sent_any_close(conn)) {
>>>>>      sk->sk_state = SMC_CLOSED;
>>>>> } else {
>>>>>      /* just shutdown, but not yet closed locally */
>>>>>      sk->sk_state = SMC_APPFINCLOSEWAIT;
>>>>> }
>>>>>
>>>>> Replace sock_set_flags or __set_bit to set_bit will fix this problem.
>>>>> Since set_bit is atomic.
>>>>>
>>>> I didn't really understand the scenario. What is
>>>> smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock
>>>> during the runtime?
>>>>
>>>
>>> Hi Wenjia,
>>>
>>> Sorry for that, It is not smc_cdc_rx_handler_rwwi() but
>>> smc_cdc_rx_handler();
>>>
>>> Following is a more specific description of the issues
>>>
>>>
>>> lock_sock()
>>> __smc_release
>>>
>>> smc_cdc_rx_handler()
>>> smc_cdc_msg_recv()
>>> bh_lock_sock()
>>> smc_cdc_msg_recv_action()
>>> sock_set_flag(DONE) sock_set_flag(DEAD)
>>> __set_bit __set_bit
>>> bh_unlock_sock()
>>> release_sock()
>>>
>>>
>>>
>>> Note : |bh_lock_sock|and |lock_sock|are not mutually exclusive. They are
>>> actually used for different purposes and contexts.
>>>
>>>
>> ok, that's true that |bh_lock_sock|and |lock_sock|are not really mutually
>> exclusive. However, since bh_lock_sock() is used, this scenario you described
>> above should not happen, because that gets the sk_lock.slock. Following this
>> scenarios, IMO, only the following situation can happen.
>>
>> lock_sock()
>> __smc_release
>>
>> smc_cdc_rx_handler()
>> smc_cdc_msg_recv()
>> bh_lock_sock()
>> smc_cdc_msg_recv_action()
>> sock_set_flag(DONE)
>> bh_unlock_sock()
>> sock_set_flag(DEAD)
>> release_sock()
> 
> Hi wenjia,
> 
> I think I know what D. Wythe means now, and I think he is right on this.
> 
> IIUC, in process context, lock_sock() won't respect bh_lock_sock() if it
> acquires the lock before bh_lock_sock(). This is how the sock lock works.
> 
>      PROCESS CONTEXT                                 INTERRUPT CONTEXT
> ------------------------------------------------------------------------
> lock_sock()
>      spin_lock_bh(&sk->sk_lock.slock);
>      ...
>      sk->sk_lock.owned = 1;
>      // here the spinlock is released
>      spin_unlock_bh(&sk->sk_lock.slock);
> __smc_release()
>                                                     bh_lock_sock(&smc->sk);
>                                                     smc_cdc_msg_recv_action(smc, cdc);
>                                                         sock_set_flag(&smc->sk, SOCK_DONE);
>                                                     bh_unlock_sock(&smc->sk);
> 
>      sock_set_flag(DEAD)  <-- Can be before or after sock_set_flag(DONE)
> release_sock()
> 
> The bh_lock_sock() only spins on sk->sk_lock.slock, which is already released
> after lock_sock() return. Therefor, there is actually no lock between
> the code after lock_sock() and before release_sock() with bh_lock_sock()...bh_unlock_sock().
> Thus, sock_set_flag(DEAD) won't respect bh_lock_sock() at all, and might be
> before or after sock_set_flag(DONE).
> 
> 
> Actually, in TCP, the interrupt context will check sock_owned_by_user().
> If it returns true, the softirq just defer the process to backlog, and process
> that in release_sock(). Which avoid the race between softirq and process
> when visiting the 'struct sock'.
> 
> tcp_v4_rcv()
>           bh_lock_sock_nested(sk);
>           tcp_segs_in(tcp_sk(sk), skb);
>           ret = 0;
>           if (!sock_owned_by_user(sk)) {
>                   ret = tcp_v4_do_rcv(sk, skb);
>           } else {
>                   if (tcp_add_backlog(sk, skb, &drop_reason))
>                           goto discard_and_relse;
>           }
>           bh_unlock_sock(sk);
> 
> 
> But in SMC we don't have a backlog, that means fields in 'struct sock'
> might all have race, and this sock_set_flag() is just one of the cases.
> 
> Best regards,
> Dust
> 
I agree on your description above.
Sure, the following case 1) can also happen

case 1)
-------
  lock_sock()
  __smc_release

  sock_set_flag(DEAD)
  bh_lock_sock()
  smc_cdc_msg_recv_action()
  sock_set_flag(DONE)
  bh_unlock_sock()
  release_sock()

case 2)
-------
  lock_sock()
  __smc_release

  bh_lock_sock()
  smc_cdc_msg_recv_action()
  sock_set_flag(DONE) sock_set_flag(DEAD)
  __set_bit __set_bit
  bh_unlock_sock()
  release_sock()

My point here is that case2) can never happen. i.e that 
sock_set_flag(DONE) and sock_set_flag(DEAD) can not happen concurrently. 
Thus, how could
the atomic set help make sure that the Dead flag would not be 
overwritten with DONE?

Maybe I'm the only one who is getting stuck in the problem. I'll 
aprieciate if you can help me get out :P

Thanks,
Wenjia
> 
> 
>>
>>>
>>>>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>>>>> ---
>>>>>    net/smc/af_smc.c    | 4 ++--
>>>>>    net/smc/smc.h       | 5 +++++
>>>>>    net/smc/smc_cdc.c   | 2 +-
>>>>>    net/smc/smc_close.c | 2 +-
>>>>>    4 files changed, 9 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>>>>> index bacdd97..5ad2a9f 100644
>>>>> --- a/net/smc/af_smc.c
>>>>> +++ b/net/smc/af_smc.c
>>>>> @@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc)
>>>>>          if (!smc->use_fallback) {
>>>>>            rc = smc_close_active(smc);
>>>>> -        sock_set_flag(sk, SOCK_DEAD);
>>>>> +        smc_sock_set_flag(sk, SOCK_DEAD);
>>>>>            sk->sk_shutdown |= SHUTDOWN_MASK;
>>>>>        } else {
>>>>>            if (sk->sk_state != SMC_CLOSED) {
>>>>> @@ -1742,7 +1742,7 @@ static int smc_clcsock_accept(struct
>>>>> smc_sock *lsmc, struct smc_sock **new_smc)
>>>>>            if (new_clcsock)
>>>>>                sock_release(new_clcsock);
>>>>>            new_sk->sk_state = SMC_CLOSED;
>>>>> -        sock_set_flag(new_sk, SOCK_DEAD);
>>>>> +        smc_sock_set_flag(new_sk, SOCK_DEAD);
>>>>>            sock_put(new_sk); /* final */
>>>>>            *new_smc = NULL;
>>>>>            goto out;
>>>>> diff --git a/net/smc/smc.h b/net/smc/smc.h
>>>>> index 24745fd..e377980 100644
>>>>> --- a/net/smc/smc.h
>>>>> +++ b/net/smc/smc.h
>>>>> @@ -377,4 +377,9 @@ void smc_fill_gid_list(struct smc_link_group *lgr,
>>>>>    int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct
>>>>> genl_info *info);
>>>>>    int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct
>>>>> genl_info *info);
>>>>>    +static inline void smc_sock_set_flag(struct sock *sk, enum
>>>>> sock_flags flag)
>>>>> +{
>>>>> +    set_bit(flag, &sk->sk_flags);
>>>>> +}
>>>>> +
>>>>>    #endif    /* __SMC_H */
>>>>> diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
>>>>> index 89105e9..01bdb79 100644
>>>>> --- a/net/smc/smc_cdc.c
>>>>> +++ b/net/smc/smc_cdc.c
>>>>> @@ -385,7 +385,7 @@ static void smc_cdc_msg_recv_action(struct
>>>>> smc_sock *smc,
>>>>>            smc->sk.sk_shutdown |= RCV_SHUTDOWN;
>>>>>            if (smc->clcsock && smc->clcsock->sk)
>>>>>                smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
>>>>> -        sock_set_flag(&smc->sk, SOCK_DONE);
>>>>> +        smc_sock_set_flag(&smc->sk, SOCK_DONE);
>>>>>            sock_hold(&smc->sk); /* sock_put in close_work */
>>>>>            if (!queue_work(smc_close_wq, &conn->close_work))
>>>>>                sock_put(&smc->sk);
>>>>> diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
>>>>> index dbdf03e..449ef45 100644
>>>>> --- a/net/smc/smc_close.c
>>>>> +++ b/net/smc/smc_close.c
>>>>> @@ -173,7 +173,7 @@ void smc_close_active_abort(struct smc_sock *smc)
>>>>>            break;
>>>>>        }
>>>>>    -    sock_set_flag(sk, SOCK_DEAD);
>>>>> +    smc_sock_set_flag(sk, SOCK_DEAD);
>>>>>        sk->sk_state_change(sk);
>>>>>          if (release_clcsock) {
>>>
Dust Li Oct. 13, 2023, 12:27 p.m. UTC | #7
On Fri, Oct 13, 2023 at 01:52:09PM +0200, Wenjia Zhang wrote:
>
>
>On 13.10.23 07:32, Dust Li wrote:
>> On Thu, Oct 12, 2023 at 01:51:54PM +0200, Wenjia Zhang wrote:
>> > 
>> > 
>> > On 12.10.23 04:37, D. Wythe wrote:
>> > > 
>> > > 
>> > > On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>> > > > 
>> > > > 
>> > > > On 11.10.23 09:33, D. Wythe wrote:
>> > > > > From: "D. Wythe" <alibuda@linux.alibaba.com>
>> > > > > 
>> > > > > Considering scenario:
>> > > > > 
>> > > > >                  smc_cdc_rx_handler_rwwi
>> > > > > __smc_release
>> > > > >                  sock_set_flag
>> > > > > smc_close_active()
>> > > > > sock_set_flag
>> > > > > 
>> > > > > __set_bit(DEAD)            __set_bit(DONE)
>> > > > > 
>> > > > > Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>> > > > > if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
>> > > > > in smc_close_passive_work:
>> > > > > 
>> > > > > if (sock_flag(sk, SOCK_DEAD) &&
>> > > > >      smc_close_sent_any_close(conn)) {
>> > > > >      sk->sk_state = SMC_CLOSED;
>> > > > > } else {
>> > > > >      /* just shutdown, but not yet closed locally */
>> > > > >      sk->sk_state = SMC_APPFINCLOSEWAIT;
>> > > > > }
>> > > > > 
>> > > > > Replace sock_set_flags or __set_bit to set_bit will fix this problem.
>> > > > > Since set_bit is atomic.
>> > > > > 
>> > > > I didn't really understand the scenario. What is
>> > > > smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock
>> > > > during the runtime?
>> > > > 
>> > > 
>> > > Hi Wenjia,
>> > > 
>> > > Sorry for that, It is not smc_cdc_rx_handler_rwwi() but
>> > > smc_cdc_rx_handler();
>> > > 
>> > > Following is a more specific description of the issues
>> > > 
>> > > 
>> > > lock_sock()
>> > > __smc_release
>> > > 
>> > > smc_cdc_rx_handler()
>> > > smc_cdc_msg_recv()
>> > > bh_lock_sock()
>> > > smc_cdc_msg_recv_action()
>> > > sock_set_flag(DONE) sock_set_flag(DEAD)
>> > > __set_bit __set_bit
>> > > bh_unlock_sock()
>> > > release_sock()
>> > > 
>> > > 
>> > > 
>> > > Note : |bh_lock_sock|and |lock_sock|are not mutually exclusive. They are
>> > > actually used for different purposes and contexts.
>> > > 
>> > > 
>> > ok, that's true that |bh_lock_sock|and |lock_sock|are not really mutually
>> > exclusive. However, since bh_lock_sock() is used, this scenario you described
>> > above should not happen, because that gets the sk_lock.slock. Following this
>> > scenarios, IMO, only the following situation can happen.
>> > 
>> > lock_sock()
>> > __smc_release
>> > 
>> > smc_cdc_rx_handler()
>> > smc_cdc_msg_recv()
>> > bh_lock_sock()
>> > smc_cdc_msg_recv_action()
>> > sock_set_flag(DONE)
>> > bh_unlock_sock()
>> > sock_set_flag(DEAD)
>> > release_sock()
>> 
>> Hi wenjia,
>> 
>> I think I know what D. Wythe means now, and I think he is right on this.
>> 
>> IIUC, in process context, lock_sock() won't respect bh_lock_sock() if it
>> acquires the lock before bh_lock_sock(). This is how the sock lock works.
>> 
>>      PROCESS CONTEXT                                 INTERRUPT CONTEXT
>> ------------------------------------------------------------------------
>> lock_sock()
>>      spin_lock_bh(&sk->sk_lock.slock);
>>      ...
>>      sk->sk_lock.owned = 1;
>>      // here the spinlock is released
>>      spin_unlock_bh(&sk->sk_lock.slock);
>> __smc_release()
>>                                                     bh_lock_sock(&smc->sk);
>>                                                     smc_cdc_msg_recv_action(smc, cdc);
>>                                                         sock_set_flag(&smc->sk, SOCK_DONE);
>>                                                     bh_unlock_sock(&smc->sk);
>> 
>>      sock_set_flag(DEAD)  <-- Can be before or after sock_set_flag(DONE)
>> release_sock()
>> 
>> The bh_lock_sock() only spins on sk->sk_lock.slock, which is already released
>> after lock_sock() return. Therefor, there is actually no lock between
>> the code after lock_sock() and before release_sock() with bh_lock_sock()...bh_unlock_sock().
>> Thus, sock_set_flag(DEAD) won't respect bh_lock_sock() at all, and might be
>> before or after sock_set_flag(DONE).
>> 
>> 
>> Actually, in TCP, the interrupt context will check sock_owned_by_user().
>> If it returns true, the softirq just defer the process to backlog, and process
>> that in release_sock(). Which avoid the race between softirq and process
>> when visiting the 'struct sock'.
>> 
>> tcp_v4_rcv()
>>           bh_lock_sock_nested(sk);
>>           tcp_segs_in(tcp_sk(sk), skb);
>>           ret = 0;
>>           if (!sock_owned_by_user(sk)) {
>>                   ret = tcp_v4_do_rcv(sk, skb);
>>           } else {
>>                   if (tcp_add_backlog(sk, skb, &drop_reason))
>>                           goto discard_and_relse;
>>           }
>>           bh_unlock_sock(sk);
>> 
>> 
>> But in SMC we don't have a backlog, that means fields in 'struct sock'
>> might all have race, and this sock_set_flag() is just one of the cases.
>> 
>> Best regards,
>> Dust
>> 
>I agree on your description above.
>Sure, the following case 1) can also happen
>
>case 1)
>-------
> lock_sock()
> __smc_release
>
> sock_set_flag(DEAD)
> bh_lock_sock()
> smc_cdc_msg_recv_action()
> sock_set_flag(DONE)
> bh_unlock_sock()
> release_sock()
>
>case 2)
>-------
> lock_sock()
> __smc_release
>
> bh_lock_sock()
> smc_cdc_msg_recv_action()
> sock_set_flag(DONE) sock_set_flag(DEAD)
> __set_bit __set_bit
> bh_unlock_sock()
> release_sock()
>
>My point here is that case2) can never happen. i.e that sock_set_flag(DONE)
>and sock_set_flag(DEAD) can not happen concurrently. Thus, how could
>the atomic set help make sure that the Dead flag would not be overwritten
>with DONE?

I agree with you on this. I also don't see using atomic can
solve the problem of overwriting the DEAD flag with DONE.

I think we need some mechanisms to ensure that sk_flags and other
struct sock related fields are not modified simultaneously.

Best regards,
Dust
D. Wythe Oct. 17, 2023, 2 a.m. UTC | #8
On 10/13/23 8:27 PM, Dust Li wrote:
> On Fri, Oct 13, 2023 at 01:52:09PM +0200, Wenjia Zhang wrote:
>>
>> On 13.10.23 07:32, Dust Li wrote:
>>> On Thu, Oct 12, 2023 at 01:51:54PM +0200, Wenjia Zhang wrote:
>>>>
>>>> On 12.10.23 04:37, D. Wythe wrote:
>>>>>
>>>>> On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>>>>>>
>>>>>> On 11.10.23 09:33, D. Wythe wrote:
>>>>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>>>>
>>>>>>> Considering scenario:
>>>>>>>
>>>>>>>                   smc_cdc_rx_handler_rwwi
>>>>>>> __smc_release
>>>>>>>                   sock_set_flag
>>>>>>> smc_close_active()
>>>>>>> sock_set_flag
>>>>>>>
>>>>>>> __set_bit(DEAD)            __set_bit(DONE)
>>>>>>>
>>>>>>> Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>>>>>>> if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
>>>>>>> in smc_close_passive_work:
>>>>>>>
>>>>>>> if (sock_flag(sk, SOCK_DEAD) &&
>>>>>>>       smc_close_sent_any_close(conn)) {
>>>>>>>       sk->sk_state = SMC_CLOSED;
>>>>>>> } else {
>>>>>>>       /* just shutdown, but not yet closed locally */
>>>>>>>       sk->sk_state = SMC_APPFINCLOSEWAIT;
>>>>>>> }
>>>>>>>
>>>>>>> Replace sock_set_flags or __set_bit to set_bit will fix this problem.
>>>>>>> Since set_bit is atomic.
>>>>>>>
>>>>>> I didn't really understand the scenario. What is
>>>>>> smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock
>>>>>> during the runtime?
>>>>>>
>>>>> Hi Wenjia,
>>>>>
>>>>> Sorry for that, It is not smc_cdc_rx_handler_rwwi() but
>>>>> smc_cdc_rx_handler();
>>>>>
>>>>> Following is a more specific description of the issues
>>>>>
>>>>>
>>>>> lock_sock()
>>>>> __smc_release
>>>>>
>>>>> smc_cdc_rx_handler()
>>>>> smc_cdc_msg_recv()
>>>>> bh_lock_sock()
>>>>> smc_cdc_msg_recv_action()
>>>>> sock_set_flag(DONE) sock_set_flag(DEAD)
>>>>> __set_bit __set_bit
>>>>> bh_unlock_sock()
>>>>> release_sock()
>>>>>
>>>>>
>>>>>
>>>>> Note : |bh_lock_sock|and |lock_sock|are not mutually exclusive. They are
>>>>> actually used for different purposes and contexts.
>>>>>
>>>>>
>>>> ok, that's true that |bh_lock_sock|and |lock_sock|are not really mutually
>>>> exclusive. However, since bh_lock_sock() is used, this scenario you described
>>>> above should not happen, because that gets the sk_lock.slock. Following this
>>>> scenarios, IMO, only the following situation can happen.
>>>>
>>>> lock_sock()
>>>> __smc_release
>>>>
>>>> smc_cdc_rx_handler()
>>>> smc_cdc_msg_recv()
>>>> bh_lock_sock()
>>>> smc_cdc_msg_recv_action()
>>>> sock_set_flag(DONE)
>>>> bh_unlock_sock()
>>>> sock_set_flag(DEAD)
>>>> release_sock()
>>> Hi wenjia,
>>>
>>> I think I know what D. Wythe means now, and I think he is right on this.
>>>
>>> IIUC, in process context, lock_sock() won't respect bh_lock_sock() if it
>>> acquires the lock before bh_lock_sock(). This is how the sock lock works.
>>>
>>>       PROCESS CONTEXT                                 INTERRUPT CONTEXT
>>> ------------------------------------------------------------------------
>>> lock_sock()
>>>       spin_lock_bh(&sk->sk_lock.slock);
>>>       ...
>>>       sk->sk_lock.owned = 1;
>>>       // here the spinlock is released
>>>       spin_unlock_bh(&sk->sk_lock.slock);
>>> __smc_release()
>>>                                                      bh_lock_sock(&smc->sk);
>>>                                                      smc_cdc_msg_recv_action(smc, cdc);
>>>                                                          sock_set_flag(&smc->sk, SOCK_DONE);
>>>                                                      bh_unlock_sock(&smc->sk);
>>>
>>>       sock_set_flag(DEAD)  <-- Can be before or after sock_set_flag(DONE)
>>> release_sock()
>>>
>>> The bh_lock_sock() only spins on sk->sk_lock.slock, which is already released
>>> after lock_sock() return. Therefor, there is actually no lock between
>>> the code after lock_sock() and before release_sock() with bh_lock_sock()...bh_unlock_sock().
>>> Thus, sock_set_flag(DEAD) won't respect bh_lock_sock() at all, and might be
>>> before or after sock_set_flag(DONE).
>>>
>>>
>>> Actually, in TCP, the interrupt context will check sock_owned_by_user().
>>> If it returns true, the softirq just defer the process to backlog, and process
>>> that in release_sock(). Which avoid the race between softirq and process
>>> when visiting the 'struct sock'.
>>>
>>> tcp_v4_rcv()
>>>            bh_lock_sock_nested(sk);
>>>            tcp_segs_in(tcp_sk(sk), skb);
>>>            ret = 0;
>>>            if (!sock_owned_by_user(sk)) {
>>>                    ret = tcp_v4_do_rcv(sk, skb);
>>>            } else {
>>>                    if (tcp_add_backlog(sk, skb, &drop_reason))
>>>                            goto discard_and_relse;
>>>            }
>>>            bh_unlock_sock(sk);
>>>
>>>
>>> But in SMC we don't have a backlog, that means fields in 'struct sock'
>>> might all have race, and this sock_set_flag() is just one of the cases.
>>>
>>> Best regards,
>>> Dust
>>>
>> I agree on your description above.
>> Sure, the following case 1) can also happen
>>
>> case 1)
>> -------
>> lock_sock()
>> __smc_release
>>
>> sock_set_flag(DEAD)
>> bh_lock_sock()
>> smc_cdc_msg_recv_action()
>> sock_set_flag(DONE)
>> bh_unlock_sock()
>> release_sock()
>>
>> case 2)
>> -------
>> lock_sock()
>> __smc_release
>>
>> bh_lock_sock()
>> smc_cdc_msg_recv_action()
>> sock_set_flag(DONE) sock_set_flag(DEAD)
>> __set_bit __set_bit
>> bh_unlock_sock()
>> release_sock()
>>
>> My point here is that case2) can never happen. i.e that sock_set_flag(DONE)
>> and sock_set_flag(DEAD) can not happen concurrently. Thus, how could
>> the atomic set help make sure that the Dead flag would not be overwritten
>> with DONE?
> I agree with you on this. I also don't see using atomic can
> solve the problem of overwriting the DEAD flag with DONE.
>
> I think we need some mechanisms to ensure that sk_flags and other
> struct sock related fields are not modified simultaneously.
>
> Best regards,
> Dust

It seems that everyone has agrees on that case 2 is impossible. I'm a 
bit confused, why that
sock_set_flag(DONE) and sock_set_flag(DEAD) can not happen concurrently. 
What mechanism
prevents their parallel execution?

Best wishes,
D. Wythe

>
Dust Li Oct. 17, 2023, 8:39 a.m. UTC | #9
On Tue, Oct 17, 2023 at 10:00:28AM +0800, D. Wythe wrote:
>
>
>On 10/13/23 8:27 PM, Dust Li wrote:
>> On Fri, Oct 13, 2023 at 01:52:09PM +0200, Wenjia Zhang wrote:
>> > 
>> > On 13.10.23 07:32, Dust Li wrote:
>> > > On Thu, Oct 12, 2023 at 01:51:54PM +0200, Wenjia Zhang wrote:
>> > > > 
>> > > > On 12.10.23 04:37, D. Wythe wrote:
>> > > > > 
>> > > > > On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>> > > > > > 
>> > > > > > On 11.10.23 09:33, D. Wythe wrote:
>> > > > > > > From: "D. Wythe" <alibuda@linux.alibaba.com>
>> > > > > > > 
>> > > > > > > Considering scenario:
>> > > > > > > 
>> > > > > > >                   smc_cdc_rx_handler_rwwi
>> > > > > > > __smc_release
>> > > > > > >                   sock_set_flag
>> > > > > > > smc_close_active()
>> > > > > > > sock_set_flag
>> > > > > > > 
>> > > > > > > __set_bit(DEAD)            __set_bit(DONE)
>> > > > > > > 
>> > > > > > > Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>> > > > > > > if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
>> > > > > > > in smc_close_passive_work:
>> > > > > > > 
>> > > > > > > if (sock_flag(sk, SOCK_DEAD) &&
>> > > > > > >       smc_close_sent_any_close(conn)) {
>> > > > > > >       sk->sk_state = SMC_CLOSED;
>> > > > > > > } else {
>> > > > > > >       /* just shutdown, but not yet closed locally */
>> > > > > > >       sk->sk_state = SMC_APPFINCLOSEWAIT;
>> > > > > > > }
>> > > > > > > 
>> > > > > > > Replace sock_set_flags or __set_bit to set_bit will fix this problem.
>> > > > > > > Since set_bit is atomic.
>> > > > > > > 
>> > > > > > I didn't really understand the scenario. What is
>> > > > > > smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock
>> > > > > > during the runtime?
>> > > > > > 
>> > > > > Hi Wenjia,
>> > > > > 
>> > > > > Sorry for that, It is not smc_cdc_rx_handler_rwwi() but
>> > > > > smc_cdc_rx_handler();
>> > > > > 
>> > > > > Following is a more specific description of the issues
>> > > > > 
>> > > > > 
>> > > > > lock_sock()
>> > > > > __smc_release
>> > > > > 
>> > > > > smc_cdc_rx_handler()
>> > > > > smc_cdc_msg_recv()
>> > > > > bh_lock_sock()
>> > > > > smc_cdc_msg_recv_action()
>> > > > > sock_set_flag(DONE) sock_set_flag(DEAD)
>> > > > > __set_bit __set_bit
>> > > > > bh_unlock_sock()
>> > > > > release_sock()
>> > > > > 
>> > > > > 
>> > > > > 
>> > > > > Note : |bh_lock_sock|and |lock_sock|are not mutually exclusive. They are
>> > > > > actually used for different purposes and contexts.
>> > > > > 
>> > > > > 
>> > > > ok, that's true that |bh_lock_sock|and |lock_sock|are not really mutually
>> > > > exclusive. However, since bh_lock_sock() is used, this scenario you described
>> > > > above should not happen, because that gets the sk_lock.slock. Following this
>> > > > scenarios, IMO, only the following situation can happen.
>> > > > 
>> > > > lock_sock()
>> > > > __smc_release
>> > > > 
>> > > > smc_cdc_rx_handler()
>> > > > smc_cdc_msg_recv()
>> > > > bh_lock_sock()
>> > > > smc_cdc_msg_recv_action()
>> > > > sock_set_flag(DONE)
>> > > > bh_unlock_sock()
>> > > > sock_set_flag(DEAD)
>> > > > release_sock()
>> > > Hi wenjia,
>> > > 
>> > > I think I know what D. Wythe means now, and I think he is right on this.
>> > > 
>> > > IIUC, in process context, lock_sock() won't respect bh_lock_sock() if it
>> > > acquires the lock before bh_lock_sock(). This is how the sock lock works.
>> > > 
>> > >       PROCESS CONTEXT                                 INTERRUPT CONTEXT
>> > > ------------------------------------------------------------------------
>> > > lock_sock()
>> > >       spin_lock_bh(&sk->sk_lock.slock);
>> > >       ...
>> > >       sk->sk_lock.owned = 1;
>> > >       // here the spinlock is released
>> > >       spin_unlock_bh(&sk->sk_lock.slock);
>> > > __smc_release()
>> > >                                                      bh_lock_sock(&smc->sk);
>> > >                                                      smc_cdc_msg_recv_action(smc, cdc);
>> > >                                                          sock_set_flag(&smc->sk, SOCK_DONE);
>> > >                                                      bh_unlock_sock(&smc->sk);
>> > > 
>> > >       sock_set_flag(DEAD)  <-- Can be before or after sock_set_flag(DONE)
>> > > release_sock()
>> > > 
>> > > The bh_lock_sock() only spins on sk->sk_lock.slock, which is already released
>> > > after lock_sock() return. Therefor, there is actually no lock between
>> > > the code after lock_sock() and before release_sock() with bh_lock_sock()...bh_unlock_sock().
>> > > Thus, sock_set_flag(DEAD) won't respect bh_lock_sock() at all, and might be
>> > > before or after sock_set_flag(DONE).
>> > > 
>> > > 
>> > > Actually, in TCP, the interrupt context will check sock_owned_by_user().
>> > > If it returns true, the softirq just defer the process to backlog, and process
>> > > that in release_sock(). Which avoid the race between softirq and process
>> > > when visiting the 'struct sock'.
>> > > 
>> > > tcp_v4_rcv()
>> > >            bh_lock_sock_nested(sk);
>> > >            tcp_segs_in(tcp_sk(sk), skb);
>> > >            ret = 0;
>> > >            if (!sock_owned_by_user(sk)) {
>> > >                    ret = tcp_v4_do_rcv(sk, skb);
>> > >            } else {
>> > >                    if (tcp_add_backlog(sk, skb, &drop_reason))
>> > >                            goto discard_and_relse;
>> > >            }
>> > >            bh_unlock_sock(sk);
>> > > 
>> > > 
>> > > But in SMC we don't have a backlog, that means fields in 'struct sock'
>> > > might all have race, and this sock_set_flag() is just one of the cases.
>> > > 
>> > > Best regards,
>> > > Dust
>> > > 
>> > I agree on your description above.
>> > Sure, the following case 1) can also happen
>> > 
>> > case 1)
>> > -------
>> > lock_sock()
>> > __smc_release
>> > 
>> > sock_set_flag(DEAD)
>> > bh_lock_sock()
>> > smc_cdc_msg_recv_action()
>> > sock_set_flag(DONE)
>> > bh_unlock_sock()
>> > release_sock()
>> > 
>> > case 2)
>> > -------
>> > lock_sock()
>> > __smc_release
>> > 
>> > bh_lock_sock()
>> > smc_cdc_msg_recv_action()
>> > sock_set_flag(DONE) sock_set_flag(DEAD)
>> > __set_bit __set_bit
>> > bh_unlock_sock()
>> > release_sock()
>> > 
>> > My point here is that case2) can never happen. i.e that sock_set_flag(DONE)
>> > and sock_set_flag(DEAD) can not happen concurrently. Thus, how could
>> > the atomic set help make sure that the Dead flag would not be overwritten
>> > with DONE?
>> I agree with you on this. I also don't see using atomic can
>> solve the problem of overwriting the DEAD flag with DONE.
>> 
>> I think we need some mechanisms to ensure that sk_flags and other
>> struct sock related fields are not modified simultaneously.
>> 
>> Best regards,
>> Dust
>
>It seems that everyone has agrees on that case 2 is impossible. I'm a bit
>confused, why that
>sock_set_flag(DONE) and sock_set_flag(DEAD) can not happen concurrently. What
>mechanism
>prevents their parallel execution?

Upon reviewing the code again, I realize that my previous understanding
was incorrect. I mistakenly believed that the DEAD and DONE flags would
overwrite each other, without realizing that sk_flags is actually used
as a bitmap.

So, I think you are right, using atomic will ensure that the DEAD flag is
always set.

Best regards,
Dust
Wenjia Zhang Oct. 17, 2023, 5:03 p.m. UTC | #10
On 17.10.23 04:00, D. Wythe wrote:
> 
> 
> On 10/13/23 8:27 PM, Dust Li wrote:
>> On Fri, Oct 13, 2023 at 01:52:09PM +0200, Wenjia Zhang wrote:
>>>
>>> On 13.10.23 07:32, Dust Li wrote:
>>>> On Thu, Oct 12, 2023 at 01:51:54PM +0200, Wenjia Zhang wrote:
>>>>>
>>>>> On 12.10.23 04:37, D. Wythe wrote:
>>>>>>
>>>>>> On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>>>>>>>
>>>>>>> On 11.10.23 09:33, D. Wythe wrote:
>>>>>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>>>>>
>>>>>>>> Considering scenario:
>>>>>>>>
>>>>>>>>                   smc_cdc_rx_handler_rwwi
>>>>>>>> __smc_release
>>>>>>>>                   sock_set_flag
>>>>>>>> smc_close_active()
>>>>>>>> sock_set_flag
>>>>>>>>
>>>>>>>> __set_bit(DEAD)            __set_bit(DONE)
>>>>>>>>
>>>>>>>> Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>>>>>>>> if the DEAD flag lost, the state SMC_CLOSED  will be never be 
>>>>>>>> reached
>>>>>>>> in smc_close_passive_work:
>>>>>>>>
>>>>>>>> if (sock_flag(sk, SOCK_DEAD) &&
>>>>>>>>       smc_close_sent_any_close(conn)) {
>>>>>>>>       sk->sk_state = SMC_CLOSED;
>>>>>>>> } else {
>>>>>>>>       /* just shutdown, but not yet closed locally */
>>>>>>>>       sk->sk_state = SMC_APPFINCLOSEWAIT;
>>>>>>>> }
>>>>>>>>
>>>>>>>> Replace sock_set_flags or __set_bit to set_bit will fix this 
>>>>>>>> problem.
>>>>>>>> Since set_bit is atomic.
>>>>>>>>
>>>>>>> I didn't really understand the scenario. What is
>>>>>>> smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock
>>>>>>> during the runtime?
>>>>>>>
>>>>>> Hi Wenjia,
>>>>>>
>>>>>> Sorry for that, It is not smc_cdc_rx_handler_rwwi() but
>>>>>> smc_cdc_rx_handler();
>>>>>>
>>>>>> Following is a more specific description of the issues
>>>>>>
>>>>>>
>>>>>> lock_sock()
>>>>>> __smc_release
>>>>>>
>>>>>> smc_cdc_rx_handler()
>>>>>> smc_cdc_msg_recv()
>>>>>> bh_lock_sock()
>>>>>> smc_cdc_msg_recv_action()
>>>>>> sock_set_flag(DONE) sock_set_flag(DEAD)
>>>>>> __set_bit __set_bit
>>>>>> bh_unlock_sock()
>>>>>> release_sock()
>>>>>>
>>>>>>
>>>>>>
>>>>>> Note : |bh_lock_sock|and |lock_sock|are not mutually exclusive. 
>>>>>> They are
>>>>>> actually used for different purposes and contexts.
>>>>>>
>>>>>>
>>>>> ok, that's true that |bh_lock_sock|and |lock_sock|are not really 
>>>>> mutually
>>>>> exclusive. However, since bh_lock_sock() is used, this scenario you 
>>>>> described
>>>>> above should not happen, because that gets the sk_lock.slock. 
>>>>> Following this
>>>>> scenarios, IMO, only the following situation can happen.
>>>>>
>>>>> lock_sock()
>>>>> __smc_release
>>>>>
>>>>> smc_cdc_rx_handler()
>>>>> smc_cdc_msg_recv()
>>>>> bh_lock_sock()
>>>>> smc_cdc_msg_recv_action()
>>>>> sock_set_flag(DONE)
>>>>> bh_unlock_sock()
>>>>> sock_set_flag(DEAD)
>>>>> release_sock()
>>>> Hi wenjia,
>>>>
>>>> I think I know what D. Wythe means now, and I think he is right on 
>>>> this.
>>>>
>>>> IIUC, in process context, lock_sock() won't respect bh_lock_sock() 
>>>> if it
>>>> acquires the lock before bh_lock_sock(). This is how the sock lock 
>>>> works.
>>>>
>>>>       PROCESS CONTEXT                                 INTERRUPT CONTEXT
>>>> ------------------------------------------------------------------------
>>>> lock_sock()
>>>>       spin_lock_bh(&sk->sk_lock.slock);
>>>>       ...
>>>>       sk->sk_lock.owned = 1;
>>>>       // here the spinlock is released
>>>>       spin_unlock_bh(&sk->sk_lock.slock);
>>>> __smc_release()
>>>>                                                      
>>>> bh_lock_sock(&smc->sk);
>>>>                                                      
>>>> smc_cdc_msg_recv_action(smc, cdc);
>>>>                                                          
>>>> sock_set_flag(&smc->sk, SOCK_DONE);
>>>>                                                      
>>>> bh_unlock_sock(&smc->sk);
>>>>
>>>>       sock_set_flag(DEAD)  <-- Can be before or after 
>>>> sock_set_flag(DONE)
>>>> release_sock()
>>>>
>>>> The bh_lock_sock() only spins on sk->sk_lock.slock, which is already 
>>>> released
>>>> after lock_sock() return. Therefor, there is actually no lock between
>>>> the code after lock_sock() and before release_sock() with 
>>>> bh_lock_sock()...bh_unlock_sock().
>>>> Thus, sock_set_flag(DEAD) won't respect bh_lock_sock() at all, and 
>>>> might be
>>>> before or after sock_set_flag(DONE).
>>>>
>>>>
>>>> Actually, in TCP, the interrupt context will check 
>>>> sock_owned_by_user().
>>>> If it returns true, the softirq just defer the process to backlog, 
>>>> and process
>>>> that in release_sock(). Which avoid the race between softirq and 
>>>> process
>>>> when visiting the 'struct sock'.
>>>>
>>>> tcp_v4_rcv()
>>>>            bh_lock_sock_nested(sk);
>>>>            tcp_segs_in(tcp_sk(sk), skb);
>>>>            ret = 0;
>>>>            if (!sock_owned_by_user(sk)) {
>>>>                    ret = tcp_v4_do_rcv(sk, skb);
>>>>            } else {
>>>>                    if (tcp_add_backlog(sk, skb, &drop_reason))
>>>>                            goto discard_and_relse;
>>>>            }
>>>>            bh_unlock_sock(sk);
>>>>
>>>>
>>>> But in SMC we don't have a backlog, that means fields in 'struct sock'
>>>> might all have race, and this sock_set_flag() is just one of the cases.
>>>>
>>>> Best regards,
>>>> Dust
>>>>
>>> I agree on your description above.
>>> Sure, the following case 1) can also happen
>>>
>>> case 1)
>>> -------
>>> lock_sock()
>>> __smc_release
>>>
>>> sock_set_flag(DEAD)
>>> bh_lock_sock()
>>> smc_cdc_msg_recv_action()
>>> sock_set_flag(DONE)
>>> bh_unlock_sock()
>>> release_sock()
>>>
>>> case 2)
>>> -------
>>> lock_sock()
>>> __smc_release
>>>
>>> bh_lock_sock()
>>> smc_cdc_msg_recv_action()
>>> sock_set_flag(DONE) sock_set_flag(DEAD)
>>> __set_bit __set_bit
>>> bh_unlock_sock()
>>> release_sock()
>>>
>>> My point here is that case2) can never happen. i.e that 
>>> sock_set_flag(DONE)
>>> and sock_set_flag(DEAD) can not happen concurrently. Thus, how could
>>> the atomic set help make sure that the Dead flag would not be 
>>> overwritten
>>> with DONE?
>> I agree with you on this. I also don't see using atomic can
>> solve the problem of overwriting the DEAD flag with DONE.
>>
>> I think we need some mechanisms to ensure that sk_flags and other
>> struct sock related fields are not modified simultaneously.
>>
>> Best regards,
>> Dust
> 
> It seems that everyone has agrees on that case 2 is impossible. I'm a 
> bit confused, why that
> sock_set_flag(DONE) and sock_set_flag(DEAD) can not happen concurrently. 
> What mechanism
> prevents their parallel execution?
> 
> Best wishes,
> D. Wythe
> 
>>
> 
In the smc_cdc_rx_handler(), if bh_lock_sock() is got, how could the 
sock_set_flag(DEAD) in the __smc_release() modify the flag concurrently? 
As I said, that could be just kind of lapse of my thought, but I still 
want to make it clarify.
Wenjia Zhang Oct. 19, 2023, 11:54 a.m. UTC | #11
On 19.10.23 10:09, D. Wythe wrote:
> 
> 
> On 10/18/23 1:03 AM, Wenjia Zhang wrote:
>>
>>
>> On 17.10.23 04:00, D. Wythe wrote:
>>>
>>>
>>> On 10/13/23 8:27 PM, Dust Li wrote:
>>>> On Fri, Oct 13, 2023 at 01:52:09PM +0200, Wenjia Zhang wrote:
>>>>>
>>>>> On 13.10.23 07:32, Dust Li wrote:
>>>>>> On Thu, Oct 12, 2023 at 01:51:54PM +0200, Wenjia Zhang wrote:
>>>>>>>
>>>>>>> On 12.10.23 04:37, D. Wythe wrote:
>>>>>>>>
>>>>>>>> On 10/12/23 4:31 AM, Wenjia Zhang wrote:
>>>>>>>>>
>>>>>>>>> On 11.10.23 09:33, D. Wythe wrote:
>>>>>>>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>>>>>>>
>>>>>>>>>> Considering scenario:
>>>>>>>>>>
>>>>>>>>>>                   smc_cdc_rx_handler_rwwi
>>>>>>>>>> __smc_release
>>>>>>>>>>                   sock_set_flag
>>>>>>>>>> smc_close_active()
>>>>>>>>>> sock_set_flag
>>>>>>>>>>
>>>>>>>>>> __set_bit(DEAD)            __set_bit(DONE)
>>>>>>>>>>
>>>>>>>>>> Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
>>>>>>>>>> if the DEAD flag lost, the state SMC_CLOSED  will be never be 
>>>>>>>>>> reached
>>>>>>>>>> in smc_close_passive_work:
>>>>>>>>>>
>>>>>>>>>> if (sock_flag(sk, SOCK_DEAD) &&
>>>>>>>>>>       smc_close_sent_any_close(conn)) {
>>>>>>>>>>       sk->sk_state = SMC_CLOSED;
>>>>>>>>>> } else {
>>>>>>>>>>       /* just shutdown, but not yet closed locally */
>>>>>>>>>>       sk->sk_state = SMC_APPFINCLOSEWAIT;
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> Replace sock_set_flags or __set_bit to set_bit will fix this 
>>>>>>>>>> problem.
>>>>>>>>>> Since set_bit is atomic.
>>>>>>>>>>
>>>>>>>>> I didn't really understand the scenario. What is
>>>>>>>>> smc_cdc_rx_handler_rwwi()? What does it do? Don't it get the lock
>>>>>>>>> during the runtime?
>>>>>>>>>
>>>>>>>> Hi Wenjia,
>>>>>>>>
>>>>>>>> Sorry for that, It is not smc_cdc_rx_handler_rwwi() but
>>>>>>>> smc_cdc_rx_handler();
>>>>>>>>
>>>>>>>> Following is a more specific description of the issues
>>>>>>>>
>>>>>>>>
>>>>>>>> lock_sock()
>>>>>>>> __smc_release
>>>>>>>>
>>>>>>>> smc_cdc_rx_handler()
>>>>>>>> smc_cdc_msg_recv()
>>>>>>>> bh_lock_sock()
>>>>>>>> smc_cdc_msg_recv_action()
>>>>>>>> sock_set_flag(DONE) sock_set_flag(DEAD)
>>>>>>>> __set_bit __set_bit
>>>>>>>> bh_unlock_sock()
>>>>>>>> release_sock()
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Note : |bh_lock_sock|and |lock_sock|are not mutually exclusive. 
>>>>>>>> They are
>>>>>>>> actually used for different purposes and contexts.
>>>>>>>>
>>>>>>>>
>>>>>>> ok, that's true that |bh_lock_sock|and |lock_sock|are not really 
>>>>>>> mutually
>>>>>>> exclusive. However, since bh_lock_sock() is used, this scenario 
>>>>>>> you described
>>>>>>> above should not happen, because that gets the sk_lock.slock. 
>>>>>>> Following this
>>>>>>> scenarios, IMO, only the following situation can happen.
>>>>>>>
>>>>>>> lock_sock()
>>>>>>> __smc_release
>>>>>>>
>>>>>>> smc_cdc_rx_handler()
>>>>>>> smc_cdc_msg_recv()
>>>>>>> bh_lock_sock()
>>>>>>> smc_cdc_msg_recv_action()
>>>>>>> sock_set_flag(DONE)
>>>>>>> bh_unlock_sock()
>>>>>>> sock_set_flag(DEAD)
>>>>>>> release_sock()
>>>>>> Hi wenjia,
>>>>>>
>>>>>> I think I know what D. Wythe means now, and I think he is right on 
>>>>>> this.
>>>>>>
>>>>>> IIUC, in process context, lock_sock() won't respect bh_lock_sock() 
>>>>>> if it
>>>>>> acquires the lock before bh_lock_sock(). This is how the sock lock 
>>>>>> works.
>>>>>>
>>>>>>       PROCESS CONTEXT INTERRUPT CONTEXT
>>>>>> ------------------------------------------------------------------------
>>>>>> lock_sock()
>>>>>>       spin_lock_bh(&sk->sk_lock.slock);
>>>>>>       ...
>>>>>>       sk->sk_lock.owned = 1;
>>>>>>       // here the spinlock is released
>>>>>>       spin_unlock_bh(&sk->sk_lock.slock);
>>>>>> __smc_release()
>>>>>> bh_lock_sock(&smc->sk);
>>>>>> smc_cdc_msg_recv_action(smc, cdc);
>>>>>> sock_set_flag(&smc->sk, SOCK_DONE);
>>>>>> bh_unlock_sock(&smc->sk);
>>>>>>
>>>>>>       sock_set_flag(DEAD)  <-- Can be before or after 
>>>>>> sock_set_flag(DONE)
>>>>>> release_sock()
>>>>>>
>>>>>> The bh_lock_sock() only spins on sk->sk_lock.slock, which is 
>>>>>> already released
>>>>>> after lock_sock() return. Therefor, there is actually no lock between
>>>>>> the code after lock_sock() and before release_sock() with 
>>>>>> bh_lock_sock()...bh_unlock_sock().
>>>>>> Thus, sock_set_flag(DEAD) won't respect bh_lock_sock() at all, and 
>>>>>> might be
>>>>>> before or after sock_set_flag(DONE).
>>>>>>
>>>>>>
>>>>>> Actually, in TCP, the interrupt context will check 
>>>>>> sock_owned_by_user().
>>>>>> If it returns true, the softirq just defer the process to backlog, 
>>>>>> and process
>>>>>> that in release_sock(). Which avoid the race between softirq and 
>>>>>> process
>>>>>> when visiting the 'struct sock'.
>>>>>>
>>>>>> tcp_v4_rcv()
>>>>>>            bh_lock_sock_nested(sk);
>>>>>>            tcp_segs_in(tcp_sk(sk), skb);
>>>>>>            ret = 0;
>>>>>>            if (!sock_owned_by_user(sk)) {
>>>>>>                    ret = tcp_v4_do_rcv(sk, skb);
>>>>>>            } else {
>>>>>>                    if (tcp_add_backlog(sk, skb, &drop_reason))
>>>>>>                            goto discard_and_relse;
>>>>>>            }
>>>>>>            bh_unlock_sock(sk);
>>>>>>
>>>>>>
>>>>>> But in SMC we don't have a backlog, that means fields in 'struct 
>>>>>> sock'
>>>>>> might all have race, and this sock_set_flag() is just one of the 
>>>>>> cases.
>>>>>>
>>>>>> Best regards,
>>>>>> Dust
>>>>>>
>>>>> I agree on your description above.
>>>>> Sure, the following case 1) can also happen
>>>>>
>>>>> case 1)
>>>>> -------
>>>>> lock_sock()
>>>>> __smc_release
>>>>>
>>>>> sock_set_flag(DEAD)
>>>>> bh_lock_sock()
>>>>> smc_cdc_msg_recv_action()
>>>>> sock_set_flag(DONE)
>>>>> bh_unlock_sock()
>>>>> release_sock()
>>>>>
>>>>> case 2)
>>>>> -------
>>>>> lock_sock()
>>>>> __smc_release
>>>>>
>>>>> bh_lock_sock()
>>>>> smc_cdc_msg_recv_action()
>>>>> sock_set_flag(DONE) sock_set_flag(DEAD)
>>>>> __set_bit __set_bit
>>>>> bh_unlock_sock()
>>>>> release_sock()
>>>>>
>>>>> My point here is that case2) can never happen. i.e that 
>>>>> sock_set_flag(DONE)
>>>>> and sock_set_flag(DEAD) can not happen concurrently. Thus, how could
>>>>> the atomic set help make sure that the Dead flag would not be 
>>>>> overwritten
>>>>> with DONE?
>>>> I agree with you on this. I also don't see using atomic can
>>>> solve the problem of overwriting the DEAD flag with DONE.
>>>>
>>>> I think we need some mechanisms to ensure that sk_flags and other
>>>> struct sock related fields are not modified simultaneously.
>>>>
>>>> Best regards,
>>>> Dust
>>>
>>> It seems that everyone has agrees on that case 2 is impossible. I'm a 
>>> bit confused, why that
>>> sock_set_flag(DONE) and sock_set_flag(DEAD) can not happen 
>>> concurrently. What mechanism
>>> prevents their parallel execution?
>>>
>>> Best wishes,
>>> D. Wythe
>>>
>>>>
>>>
>> In the smc_cdc_rx_handler(), if bh_lock_sock() is got, how could the 
>> sock_set_flag(DEAD) in the __smc_release() modify the flag 
>> concurrently? As I said, that could be just kind of lapse of my 
>> thought, but I still want to make it clarify.
> 
> #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock))
> 
> static inline void lock_sock(struct sock *sk)
> {
>      lock_sock_nested(sk, 0);
> }
> 
> void lock_sock_nested(struct sock *sk, int subclass)
> {
>      might_sleep();
> spin_lock_bh(&sk->sk_lock.slock);
>      if (sk->sk_lock.owned)
>          __lock_sock(sk);
>      sk->sk_lock.owned = 1;
> 
> */spin_unlock(&sk->sk_lock.slock);/*
>      /*
>       * The sk_lock has mutex_lock() semantics here:
>       */
>      mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
>      local_bh_enable();
> }
> 
> 
> It seems that you believe bh_lock_sock() will block the execution of 
> __smc_release(), indicating that you think the spin on slock will block 
> the execution of __smc_release().
> So, you assume that __smc_release() must also spin on slock, right?
> 
That is right what I mean.

> However, lock_sock() releases the slock before returning. You can see it 
> in code above. In other words, __smc_release() will not spin on slock.
> This means that bh_lock_sock() will not block the execution of 
> __smc_release().
> 
Do you mean that the spin_unlock you marked in the code above is to 
release the socket spin lock from __smc_release()?

> Hoping this will helps
> 
> Best wishes,
> D. Wythe
> 
> 
> 
> 
>
Wenjia Zhang Oct. 23, 2023, 8:53 p.m. UTC | #12
On 11.10.23 09:33, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> Considering scenario:
> 
> 				smc_cdc_rx_handler_rwwi
> __smc_release
> 				sock_set_flag
> smc_close_active()
> sock_set_flag
> 
> __set_bit(DEAD)			__set_bit(DONE)
> 
> Dues to __set_bit is not atomic, the DEAD or DONE might be lost.
> if the DEAD flag lost, the state SMC_CLOSED  will be never be reached
> in smc_close_passive_work:
> 
> if (sock_flag(sk, SOCK_DEAD) &&
> 	smc_close_sent_any_close(conn)) {
> 	sk->sk_state = SMC_CLOSED;
> } else {
> 	/* just shutdown, but not yet closed locally */
> 	sk->sk_state = SMC_APPFINCLOSEWAIT;
> }
> 
> Replace sock_set_flags or __set_bit to set_bit will fix this problem.
> Since set_bit is atomic.
> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>   net/smc/af_smc.c    | 4 ++--
>   net/smc/smc.h       | 5 +++++
>   net/smc/smc_cdc.c   | 2 +-
>   net/smc/smc_close.c | 2 +-
>   4 files changed, 9 insertions(+), 4 deletions(-)
> 

Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
diff mbox series

Patch

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index bacdd97..5ad2a9f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -275,7 +275,7 @@  static int __smc_release(struct smc_sock *smc)
 
 	if (!smc->use_fallback) {
 		rc = smc_close_active(smc);
-		sock_set_flag(sk, SOCK_DEAD);
+		smc_sock_set_flag(sk, SOCK_DEAD);
 		sk->sk_shutdown |= SHUTDOWN_MASK;
 	} else {
 		if (sk->sk_state != SMC_CLOSED) {
@@ -1742,7 +1742,7 @@  static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 		if (new_clcsock)
 			sock_release(new_clcsock);
 		new_sk->sk_state = SMC_CLOSED;
-		sock_set_flag(new_sk, SOCK_DEAD);
+		smc_sock_set_flag(new_sk, SOCK_DEAD);
 		sock_put(new_sk); /* final */
 		*new_smc = NULL;
 		goto out;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 24745fd..e377980 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -377,4 +377,9 @@  void smc_fill_gid_list(struct smc_link_group *lgr,
 int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
 int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
 
+static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag)
+{
+	set_bit(flag, &sk->sk_flags);
+}
+
 #endif	/* __SMC_H */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 89105e9..01bdb79 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -385,7 +385,7 @@  static void smc_cdc_msg_recv_action(struct smc_sock *smc,
 		smc->sk.sk_shutdown |= RCV_SHUTDOWN;
 		if (smc->clcsock && smc->clcsock->sk)
 			smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
-		sock_set_flag(&smc->sk, SOCK_DONE);
+		smc_sock_set_flag(&smc->sk, SOCK_DONE);
 		sock_hold(&smc->sk); /* sock_put in close_work */
 		if (!queue_work(smc_close_wq, &conn->close_work))
 			sock_put(&smc->sk);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index dbdf03e..449ef45 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -173,7 +173,7 @@  void smc_close_active_abort(struct smc_sock *smc)
 		break;
 	}
 
-	sock_set_flag(sk, SOCK_DEAD);
+	smc_sock_set_flag(sk, SOCK_DEAD);
 	sk->sk_state_change(sk);
 
 	if (release_clcsock) {