diff mbox series

[net] net: ipv4: fix one memleak in __inet_del_ifa()

Message ID 20230905135554.1958156-1-liujian56@huawei.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net] net: ipv4: fix one memleak in __inet_del_ifa() | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1332 this patch: 1332
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 1353 this patch: 1353
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1355 this patch: 1355
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Liu Jian Sept. 5, 2023, 1:55 p.m. UTC
I got the below warning when do fuzzing test:
unregister_netdevice: waiting for bond0 to become free. Usage count = 2

It can be repoduced via:

ip link add bond0 type bond
sysctl -w net.ipv4.conf.bond0.promote_secondaries=1
ip addr add 4.117.174.103/0 scope 0x40 dev bond0
ip addr add 192.168.100.111/255.255.255.254 scope 0 dev bond0
ip addr add 0.0.0.4/0 scope 0x40 secondary dev bond0
ip addr del 4.117.174.103/0 scope 0x40 dev bond0
ip link delete bond0 type bond

In this reproduction test case, an incorrect 'last_prim' is found in
__inet_del_ifa(), as a result, the secondary address(0.0.0.4/0 scope 0x40)
is lost. The memory of the secondary address is leaked and the reference of
in_device and net_device is leaked.

Fix this problem by modifying the PROMOTE_SECONDANCE behavior as follows:
1. Traverse in_dev->ifa_list to search for the actual 'last_prim'.
2. When last_prim is empty, move 'promote' to the in_dev->ifa_list header.

Fixes: 0ff60a45678e ("[IPV4]: Fix secondary IP addresses after promotion")
Signed-off-by: Liu Jian <liujian56@huawei.com>
---
 net/ipv4/devinet.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

Comments

Zhu Yanjun Sept. 5, 2023, 3:57 p.m. UTC | #1
在 2023/9/5 21:55, Liu Jian 写道:
> I got the below warning when do fuzzing test:
> unregister_netdevice: waiting for bond0 to become free. Usage count = 2
> 
> It can be repoduced via:
> 
> ip link add bond0 type bond
> sysctl -w net.ipv4.conf.bond0.promote_secondaries=1
> ip addr add 4.117.174.103/0 scope 0x40 dev bond0
> ip addr add 192.168.100.111/255.255.255.254 scope 0 dev bond0
> ip addr add 0.0.0.4/0 scope 0x40 secondary dev bond0
> ip addr del 4.117.174.103/0 scope 0x40 dev bond0
> ip link delete bond0 type bond
> 
> In this reproduction test case, an incorrect 'last_prim' is found in
> __inet_del_ifa(), as a result, the secondary address(0.0.0.4/0 scope 0x40)
> is lost. The memory of the secondary address is leaked and the reference of
> in_device and net_device is leaked.
> 
> Fix this problem by modifying the PROMOTE_SECONDANCE behavior as follows:
> 1. Traverse in_dev->ifa_list to search for the actual 'last_prim'.
> 2. When last_prim is empty, move 'promote' to the in_dev->ifa_list header.
> 
> Fixes: 0ff60a45678e ("[IPV4]: Fix secondary IP addresses after promotion")
> Signed-off-by: Liu Jian <liujian56@huawei.com>
> ---
>   net/ipv4/devinet.c | 26 ++++++++++++++++++++------
>   1 file changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
> index 9cf64ee47dd2..99278f4b58e0 100644
> --- a/net/ipv4/devinet.c
> +++ b/net/ipv4/devinet.c
> @@ -355,14 +355,13 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   {
>   	struct in_ifaddr *promote = NULL;
>   	struct in_ifaddr *ifa, *ifa1;
> -	struct in_ifaddr *last_prim;
> +	struct in_ifaddr *last_prim = NULL;
>   	struct in_ifaddr *prev_prom = NULL;
>   	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
>   
>   	ASSERT_RTNL();
>   
>   	ifa1 = rtnl_dereference(*ifap);
> -	last_prim = rtnl_dereference(in_dev->ifa_list);
>   	if (in_dev->dead)
>   		goto no_promotions;
>   
> @@ -371,7 +370,16 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   	 **/
>   
>   	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
> -		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
> +		struct in_ifaddr __rcu **ifap1 = &in_dev->ifa_list;
> +
> +		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {

while ((ifa = rtnl_dereference(*ifap1))) should be enough.

Zhu Yanjun

> +			if (ifa1 == ifa)
> +				break;
> +			last_prim = ifa;
> +			ifap1 = &ifa->ifa_next;
> +		}
> +
> +		ifap1 = &ifa1->ifa_next;
>   
>   		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
>   			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
> @@ -440,9 +448,15 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   
>   			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
>   
> -			last_sec = rtnl_dereference(last_prim->ifa_next);
> -			rcu_assign_pointer(promote->ifa_next, last_sec);
> -			rcu_assign_pointer(last_prim->ifa_next, promote);
> +			if (last_prim) {
> +				last_sec = rtnl_dereference(last_prim->ifa_next);
> +				rcu_assign_pointer(promote->ifa_next, last_sec);
> +				rcu_assign_pointer(last_prim->ifa_next, promote);
> +			} else {
> +				rcu_assign_pointer(promote->ifa_next,
> +						   rtnl_dereference(in_dev->ifa_list));
> +				rcu_assign_pointer(in_dev->ifa_list, promote);
> +			}
>   		}
>   
>   		promote->ifa_flags &= ~IFA_F_SECONDARY;
Julian Anastasov Sept. 5, 2023, 5:20 p.m. UTC | #2
Hello,

On Tue, 5 Sep 2023, Liu Jian wrote:

> I got the below warning when do fuzzing test:
> unregister_netdevice: waiting for bond0 to become free. Usage count = 2
> 
> It can be repoduced via:
> 
> ip link add bond0 type bond
> sysctl -w net.ipv4.conf.bond0.promote_secondaries=1
> ip addr add 4.117.174.103/0 scope 0x40 dev bond0
> ip addr add 192.168.100.111/255.255.255.254 scope 0 dev bond0
> ip addr add 0.0.0.4/0 scope 0x40 secondary dev bond0
> ip addr del 4.117.174.103/0 scope 0x40 dev bond0
> ip link delete bond0 type bond
> 
> In this reproduction test case, an incorrect 'last_prim' is found in
> __inet_del_ifa(), as a result, the secondary address(0.0.0.4/0 scope 0x40)
> is lost. The memory of the secondary address is leaked and the reference of
> in_device and net_device is leaked.
> 
> Fix this problem by modifying the PROMOTE_SECONDANCE behavior as follows:
> 1. Traverse in_dev->ifa_list to search for the actual 'last_prim'.
> 2. When last_prim is empty, move 'promote' to the in_dev->ifa_list header.

	So, the problem is that last_prim initially points to the
first primary address that we are actually removing. Looks like with 
last_prim we try to promote the secondary IP after all primaries with
scope >= our scope, i.e. simulating a new IP insert. As the secondary IPs 
have same scope as their primary, why just not remove the last_prim 
var/code and to insert the promoted secondary at the same place as the 
deleted primary? May be your patch does the same: insert at same pos?

Before deletion:
1. primary1 scope global (to be deleted)
2. primary2 scope global
3. promoted_secondary

After deletion (old way, promote as a new insertion):
1. primary2 scope global   
2. promoted_secondary scope global (inserted as new primary)

After deletion (new way, promote at same place):
1. promoted_secondary scope global (now primary, inserted at same place)
2. primary2 scope global   

	What I mean is to use ifap as last_prim, not tested:

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5deac0517ef7..7c71fa8996bb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -355,14 +355,12 @@ static void __inet_del_ifa(struct in_device *in_dev,
 {
 	struct in_ifaddr *promote = NULL;
 	struct in_ifaddr *ifa, *ifa1;
-	struct in_ifaddr *last_prim;
 	struct in_ifaddr *prev_prom = NULL;
 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 
 	ASSERT_RTNL();
 
 	ifa1 = rtnl_dereference(*ifap);
-	last_prim = rtnl_dereference(in_dev->ifa_list);
 	if (in_dev->dead)
 		goto no_promotions;
 
@@ -374,10 +372,6 @@ static void __inet_del_ifa(struct in_device *in_dev,
 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
 
 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
-			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
-			    ifa1->ifa_scope <= ifa->ifa_scope)
-				last_prim = ifa;
-
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 			    ifa1->ifa_mask != ifa->ifa_mask ||
 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
@@ -415,7 +409,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
 no_promotions:
 	/* 2. Unlink it */
 
-	*ifap = ifa1->ifa_next;
+	rcu_assign_pointer(*ifap, rtnl_dereference(ifa1->ifa_next));
 	inet_hash_remove(ifa1);
 
 	/* 3. Announce address deletion */
@@ -440,9 +434,9 @@ static void __inet_del_ifa(struct in_device *in_dev,
 
 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
 
-			last_sec = rtnl_dereference(last_prim->ifa_next);
+			last_sec = rtnl_dereference(*ifap);
 			rcu_assign_pointer(promote->ifa_next, last_sec);
-			rcu_assign_pointer(last_prim->ifa_next, promote);
+			rcu_assign_pointer(*ifap, promote);
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
> 
> Fixes: 0ff60a45678e ("[IPV4]: Fix secondary IP addresses after promotion")
> Signed-off-by: Liu Jian <liujian56@huawei.com>
> ---
>  net/ipv4/devinet.c | 26 ++++++++++++++++++++------
>  1 file changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
> index 9cf64ee47dd2..99278f4b58e0 100644
> --- a/net/ipv4/devinet.c
> +++ b/net/ipv4/devinet.c
> @@ -355,14 +355,13 @@ static void __inet_del_ifa(struct in_device *in_dev,
>  {
>  	struct in_ifaddr *promote = NULL;
>  	struct in_ifaddr *ifa, *ifa1;
> -	struct in_ifaddr *last_prim;
> +	struct in_ifaddr *last_prim = NULL;
>  	struct in_ifaddr *prev_prom = NULL;
>  	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
>  
>  	ASSERT_RTNL();
>  
>  	ifa1 = rtnl_dereference(*ifap);
> -	last_prim = rtnl_dereference(in_dev->ifa_list);
>  	if (in_dev->dead)
>  		goto no_promotions;
>  
> @@ -371,7 +370,16 @@ static void __inet_del_ifa(struct in_device *in_dev,
>  	 **/
>  
>  	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
> -		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
> +		struct in_ifaddr __rcu **ifap1 = &in_dev->ifa_list;
> +
> +		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
> +			if (ifa1 == ifa)
> +				break;
> +			last_prim = ifa;
> +			ifap1 = &ifa->ifa_next;
> +		}
> +
> +		ifap1 = &ifa1->ifa_next;
>  
>  		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
>  			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
> @@ -440,9 +448,15 @@ static void __inet_del_ifa(struct in_device *in_dev,
>  
>  			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
>  
> -			last_sec = rtnl_dereference(last_prim->ifa_next);
> -			rcu_assign_pointer(promote->ifa_next, last_sec);
> -			rcu_assign_pointer(last_prim->ifa_next, promote);
> +			if (last_prim) {
> +				last_sec = rtnl_dereference(last_prim->ifa_next);
> +				rcu_assign_pointer(promote->ifa_next, last_sec);
> +				rcu_assign_pointer(last_prim->ifa_next, promote);
> +			} else {
> +				rcu_assign_pointer(promote->ifa_next,
> +						   rtnl_dereference(in_dev->ifa_list));
> +				rcu_assign_pointer(in_dev->ifa_list, promote);
> +			}
>  		}
>  
>  		promote->ifa_flags &= ~IFA_F_SECONDARY;
> -- 
> 2.34.1

Regards

--
Julian Anastasov <ja@ssi.bg>
Liu Jian Sept. 6, 2023, 3:39 a.m. UTC | #3
On 2023/9/6 1:20, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Tue, 5 Sep 2023, Liu Jian wrote:
> 
>> I got the below warning when do fuzzing test:
>> unregister_netdevice: waiting for bond0 to become free. Usage count = 2
>>
>> It can be repoduced via:
>>
>> ip link add bond0 type bond
>> sysctl -w net.ipv4.conf.bond0.promote_secondaries=1
>> ip addr add 4.117.174.103/0 scope 0x40 dev bond0
>> ip addr add 192.168.100.111/255.255.255.254 scope 0 dev bond0
>> ip addr add 0.0.0.4/0 scope 0x40 secondary dev bond0
>> ip addr del 4.117.174.103/0 scope 0x40 dev bond0
>> ip link delete bond0 type bond
>>
>> In this reproduction test case, an incorrect 'last_prim' is found in
>> __inet_del_ifa(), as a result, the secondary address(0.0.0.4/0 scope 0x40)
>> is lost. The memory of the secondary address is leaked and the reference of
>> in_device and net_device is leaked.
>>
>> Fix this problem by modifying the PROMOTE_SECONDANCE behavior as follows:
>> 1. Traverse in_dev->ifa_list to search for the actual 'last_prim'.
>> 2. When last_prim is empty, move 'promote' to the in_dev->ifa_list header.
> 
> 	So, the problem is that last_prim initially points to the
> first primary address that we are actually removing. Looks like with
> last_prim we try to promote the secondary IP after all primaries with
> scope >= our scope, i.e. simulating a new IP insert. As the secondary IPs
> have same scope as their primary, why just not remove the last_prim
> var/code and to insert the promoted secondary at the same place as the
> deleted primary? May be your patch does the same: insert at same pos?
> 
> Before deletion:
> 1. primary1 scope global (to be deleted)
> 2. primary2 scope global
> 3. promoted_secondary
> 
> After deletion (old way, promote as a new insertion):
> 1. primary2 scope global
> 2. promoted_secondary scope global (inserted as new primary)
> 
It is :
After deletion (old way, promoted_secondary lost):
1. primary2 scope global


> After deletion (new way, promote at same place):
> 1. promoted_secondary scope global (now primary, inserted at same place)
> 2. primary2 scope global
> 
> 	What I mean is to use ifap as last_prim, not tested:
> 
Yes, This is better and it can work also. Thanks.
Tested-by: Liu Jian <liujian56@huawei.com>

> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
> index 5deac0517ef7..7c71fa8996bb 100644
> --- a/net/ipv4/devinet.c
> +++ b/net/ipv4/devinet.c
> @@ -355,14 +355,12 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   {
>   	struct in_ifaddr *promote = NULL;
>   	struct in_ifaddr *ifa, *ifa1;
> -	struct in_ifaddr *last_prim;
>   	struct in_ifaddr *prev_prom = NULL;
>   	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
>   
>   	ASSERT_RTNL();
>   
>   	ifa1 = rtnl_dereference(*ifap);
> -	last_prim = rtnl_dereference(in_dev->ifa_list);
>   	if (in_dev->dead)
>   		goto no_promotions;
>   
> @@ -374,10 +372,6 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
>   
>   		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
> -			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
> -			    ifa1->ifa_scope <= ifa->ifa_scope)
> -				last_prim = ifa;
> -
>   			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
>   			    ifa1->ifa_mask != ifa->ifa_mask ||
>   			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
> @@ -415,7 +409,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   no_promotions:
>   	/* 2. Unlink it */
>   
> -	*ifap = ifa1->ifa_next;
> +	rcu_assign_pointer(*ifap, rtnl_dereference(ifa1->ifa_next));
>   	inet_hash_remove(ifa1);
>   
>   	/* 3. Announce address deletion */
> @@ -440,9 +434,9 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   
>   			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
>   
> -			last_sec = rtnl_dereference(last_prim->ifa_next);
> +			last_sec = rtnl_dereference(*ifap);
>   			rcu_assign_pointer(promote->ifa_next, last_sec);
> -			rcu_assign_pointer(last_prim->ifa_next, promote);
> +			rcu_assign_pointer(*ifap, promote);
>   		}
>   
>   		promote->ifa_flags &= ~IFA_F_SECONDARY;
>>
>> Fixes: 0ff60a45678e ("[IPV4]: Fix secondary IP addresses after promotion")
>> Signed-off-by: Liu Jian <liujian56@huawei.com>
>> ---
>>   net/ipv4/devinet.c | 26 ++++++++++++++++++++------
>>   1 file changed, 20 insertions(+), 6 deletions(-)
>>
>> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
>> index 9cf64ee47dd2..99278f4b58e0 100644
>> --- a/net/ipv4/devinet.c
>> +++ b/net/ipv4/devinet.c
>> @@ -355,14 +355,13 @@ static void __inet_del_ifa(struct in_device *in_dev,
>>   {
>>   	struct in_ifaddr *promote = NULL;
>>   	struct in_ifaddr *ifa, *ifa1;
>> -	struct in_ifaddr *last_prim;
>> +	struct in_ifaddr *last_prim = NULL;
>>   	struct in_ifaddr *prev_prom = NULL;
>>   	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
>>   
>>   	ASSERT_RTNL();
>>   
>>   	ifa1 = rtnl_dereference(*ifap);
>> -	last_prim = rtnl_dereference(in_dev->ifa_list);
>>   	if (in_dev->dead)
>>   		goto no_promotions;
>>   
>> @@ -371,7 +370,16 @@ static void __inet_del_ifa(struct in_device *in_dev,
>>   	 **/
>>   
>>   	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
>> -		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
>> +		struct in_ifaddr __rcu **ifap1 = &in_dev->ifa_list;
>> +
>> +		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
>> +			if (ifa1 == ifa)
>> +				break;
>> +			last_prim = ifa;
>> +			ifap1 = &ifa->ifa_next;
>> +		}
>> +
>> +		ifap1 = &ifa1->ifa_next;
>>   
>>   		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
>>   			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
>> @@ -440,9 +448,15 @@ static void __inet_del_ifa(struct in_device *in_dev,
>>   
>>   			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
>>   
>> -			last_sec = rtnl_dereference(last_prim->ifa_next);
>> -			rcu_assign_pointer(promote->ifa_next, last_sec);
>> -			rcu_assign_pointer(last_prim->ifa_next, promote);
>> +			if (last_prim) {
>> +				last_sec = rtnl_dereference(last_prim->ifa_next);
>> +				rcu_assign_pointer(promote->ifa_next, last_sec);
>> +				rcu_assign_pointer(last_prim->ifa_next, promote);
>> +			} else {
>> +				rcu_assign_pointer(promote->ifa_next,
>> +						   rtnl_dereference(in_dev->ifa_list));
>> +				rcu_assign_pointer(in_dev->ifa_list, promote);
>> +			}
>>   		}
>>   
>>   		promote->ifa_flags &= ~IFA_F_SECONDARY;
>> -- 
>> 2.34.1
> 
> Regards
> 
> --
> Julian Anastasov <ja@ssi.bg>
>
Julian Anastasov Sept. 6, 2023, 10:30 a.m. UTC | #4
Hello,

On Wed, 6 Sep 2023, liujian (CE) wrote:

> On 2023/9/6 1:20, Julian Anastasov wrote:
> > 
> > On Tue, 5 Sep 2023, Liu Jian wrote:
> > 
> >> I got the below warning when do fuzzing test:
> >> unregister_netdevice: waiting for bond0 to become free. Usage count = 2
> >>
> >> It can be repoduced via:
> >>
> >> ip link add bond0 type bond
> >> sysctl -w net.ipv4.conf.bond0.promote_secondaries=1
> >> ip addr add 4.117.174.103/0 scope 0x40 dev bond0
> >> ip addr add 192.168.100.111/255.255.255.254 scope 0 dev bond0
> >> ip addr add 0.0.0.4/0 scope 0x40 secondary dev bond0
> >> ip addr del 4.117.174.103/0 scope 0x40 dev bond0
> >> ip link delete bond0 type bond
> >>
> >> In this reproduction test case, an incorrect 'last_prim' is found in
> >> __inet_del_ifa(), as a result, the secondary address(0.0.0.4/0 scope 0x40)
> >> is lost. The memory of the secondary address is leaked and the reference of
> >> in_device and net_device is leaked.

	We can also explain that the problem occurs when we delete
the first primary address and the promoted address is leaked because
it is attached to the to-be-freed primary address instead of to ifa_list.

> >> Fix this problem by modifying the PROMOTE_SECONDANCE behavior as follows:
> >> 1. Traverse in_dev->ifa_list to search for the actual 'last_prim'.
> >> 2. When last_prim is empty, move 'promote' to the in_dev->ifa_list header.
> > 
> > 	So, the problem is that last_prim initially points to the
> > first primary address that we are actually removing. Looks like with
> > last_prim we try to promote the secondary IP after all primaries with
> > scope >= our scope, i.e. simulating a new IP insert. As the secondary IPs
> > have same scope as their primary, why just not remove the last_prim
> > var/code and to insert the promoted secondary at the same place as the
> > deleted primary? May be your patch does the same: insert at same pos?
> > 
> > Before deletion:
> > 1. primary1 scope global (to be deleted)
> > 2. primary2 scope global
> > 3. promoted_secondary
> > 
> > After deletion (old way, promote as a new insertion):
> > 1. primary2 scope global
> > 2. promoted_secondary scope global (inserted as new primary)
> > 
> It is :
> After deletion (old way, promoted_secondary lost):
> 1. primary2 scope global

	Yes, that is what happens :)

> > After deletion (new way, promote at same place):
> > 1. promoted_secondary scope global (now primary, inserted at same place)
> > 2. primary2 scope global
> > 
> >  What I mean is to use ifap as last_prim, not tested:
> > 
> Yes, This is better and it can work also. Thanks.
> Tested-by: Liu Jian <liujian56@huawei.com>

	But let me propose another version. It is a minimal
bugfix that does not change the place where the promoted address
is added and just converts last_prim to be rcu ptr to insert
position. last_prim will start from ifap because the promoted
address can not be added before this position. It is better
not to reorder the IPs because scripts may depend on the
old behavior to add the promoted IP after all others for
the scope. If you find this version better you can post it
as v2 and I'll sign it too.

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5deac0517ef7..37be82496322 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -355,14 +355,14 @@ static void __inet_del_ifa(struct in_device *in_dev,
 {
 	struct in_ifaddr *promote = NULL;
 	struct in_ifaddr *ifa, *ifa1;
-	struct in_ifaddr *last_prim;
+	struct in_ifaddr __rcu **last_prim;
 	struct in_ifaddr *prev_prom = NULL;
 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 
 	ASSERT_RTNL();
 
 	ifa1 = rtnl_dereference(*ifap);
-	last_prim = rtnl_dereference(in_dev->ifa_list);
+	last_prim = ifap;
 	if (in_dev->dead)
 		goto no_promotions;
 
@@ -376,7 +376,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 			    ifa1->ifa_scope <= ifa->ifa_scope)
-				last_prim = ifa;
+				last_prim = &ifa->ifa_next;
 
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 			    ifa1->ifa_mask != ifa->ifa_mask ||
@@ -440,9 +440,9 @@ static void __inet_del_ifa(struct in_device *in_dev,
 
 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
 
-			last_sec = rtnl_dereference(last_prim->ifa_next);
+			last_sec = rtnl_dereference(*last_prim);
 			rcu_assign_pointer(promote->ifa_next, last_sec);
-			rcu_assign_pointer(last_prim->ifa_next, promote);
+			rcu_assign_pointer(*last_prim, promote);
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;

Regards

--
Julian Anastasov <ja@ssi.bg>
Liu Jian Sept. 7, 2023, 2:54 a.m. UTC | #5
On 2023/9/6 18:30, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Wed, 6 Sep 2023, liujian (CE) wrote:
> 
>> On 2023/9/6 1:20, Julian Anastasov wrote:
>>>
>>> On Tue, 5 Sep 2023, Liu Jian wrote:
>>>
>>>> I got the below warning when do fuzzing test:
>>>> unregister_netdevice: waiting for bond0 to become free. Usage count = 2
>>>>
>>>> It can be repoduced via:
>>>>
>>>> ip link add bond0 type bond
>>>> sysctl -w net.ipv4.conf.bond0.promote_secondaries=1
>>>> ip addr add 4.117.174.103/0 scope 0x40 dev bond0
>>>> ip addr add 192.168.100.111/255.255.255.254 scope 0 dev bond0
>>>> ip addr add 0.0.0.4/0 scope 0x40 secondary dev bond0
>>>> ip addr del 4.117.174.103/0 scope 0x40 dev bond0
>>>> ip link delete bond0 type bond
>>>>
>>>> In this reproduction test case, an incorrect 'last_prim' is found in
>>>> __inet_del_ifa(), as a result, the secondary address(0.0.0.4/0 scope 0x40)
>>>> is lost. The memory of the secondary address is leaked and the reference of
>>>> in_device and net_device is leaked.
> 
> 	We can also explain that the problem occurs when we delete
> the first primary address and the promoted address is leaked because
> it is attached to the to-be-freed primary address instead of to ifa_list.
> 
>>>> Fix this problem by modifying the PROMOTE_SECONDANCE behavior as follows:
>>>> 1. Traverse in_dev->ifa_list to search for the actual 'last_prim'.
>>>> 2. When last_prim is empty, move 'promote' to the in_dev->ifa_list header.
>>>
>>> 	So, the problem is that last_prim initially points to the
>>> first primary address that we are actually removing. Looks like with
>>> last_prim we try to promote the secondary IP after all primaries with
>>> scope >= our scope, i.e. simulating a new IP insert. As the secondary IPs
>>> have same scope as their primary, why just not remove the last_prim
>>> var/code and to insert the promoted secondary at the same place as the
>>> deleted primary? May be your patch does the same: insert at same pos?
>>>
>>> Before deletion:
>>> 1. primary1 scope global (to be deleted)
>>> 2. primary2 scope global
>>> 3. promoted_secondary
>>>
>>> After deletion (old way, promote as a new insertion):
>>> 1. primary2 scope global
>>> 2. promoted_secondary scope global (inserted as new primary)
>>>
>> It is :
>> After deletion (old way, promoted_secondary lost):
>> 1. primary2 scope global
> 
> 	Yes, that is what happens :)
> 
>>> After deletion (new way, promote at same place):
>>> 1. promoted_secondary scope global (now primary, inserted at same place)
>>> 2. primary2 scope global
>>>
>>>   What I mean is to use ifap as last_prim, not tested:
>>>
>> Yes, This is better and it can work also. Thanks.
>> Tested-by: Liu Jian <liujian56@huawei.com>
> 
> 	But let me propose another version. It is a minimal
> bugfix that does not change the place where the promoted address
> is added and just converts last_prim to be rcu ptr to insert
> position. last_prim will start from ifap because the promoted
> address can not be added before this position. It is better
> not to reorder the IPs because scripts may depend on the
> old behavior to add the promoted IP after all others for
> the scope. If you find this version better you can post it
> as v2 and I'll sign it too.
Have sent v2, thank you for this case.
> 
> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
> index 5deac0517ef7..37be82496322 100644
> --- a/net/ipv4/devinet.c
> +++ b/net/ipv4/devinet.c
> @@ -355,14 +355,14 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   {
>   	struct in_ifaddr *promote = NULL;
>   	struct in_ifaddr *ifa, *ifa1;
> -	struct in_ifaddr *last_prim;
> +	struct in_ifaddr __rcu **last_prim;
>   	struct in_ifaddr *prev_prom = NULL;
>   	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
>   
>   	ASSERT_RTNL();
>   
>   	ifa1 = rtnl_dereference(*ifap);
> -	last_prim = rtnl_dereference(in_dev->ifa_list);
> +	last_prim = ifap;
>   	if (in_dev->dead)
>   		goto no_promotions;
>   
> @@ -376,7 +376,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
>   			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
>   			    ifa1->ifa_scope <= ifa->ifa_scope)
> -				last_prim = ifa;
> +				last_prim = &ifa->ifa_next;
>   
>   			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
>   			    ifa1->ifa_mask != ifa->ifa_mask ||
> @@ -440,9 +440,9 @@ static void __inet_del_ifa(struct in_device *in_dev,
>   
>   			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
>   
> -			last_sec = rtnl_dereference(last_prim->ifa_next);
> +			last_sec = rtnl_dereference(*last_prim);
>   			rcu_assign_pointer(promote->ifa_next, last_sec);
> -			rcu_assign_pointer(last_prim->ifa_next, promote);
> +			rcu_assign_pointer(*last_prim, promote);
>   		}
>   
>   		promote->ifa_flags &= ~IFA_F_SECONDARY;
> 
> Regards
> 
> --
> Julian Anastasov <ja@ssi.bg>
>
diff mbox series

Patch

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9cf64ee47dd2..99278f4b58e0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -355,14 +355,13 @@  static void __inet_del_ifa(struct in_device *in_dev,
 {
 	struct in_ifaddr *promote = NULL;
 	struct in_ifaddr *ifa, *ifa1;
-	struct in_ifaddr *last_prim;
+	struct in_ifaddr *last_prim = NULL;
 	struct in_ifaddr *prev_prom = NULL;
 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 
 	ASSERT_RTNL();
 
 	ifa1 = rtnl_dereference(*ifap);
-	last_prim = rtnl_dereference(in_dev->ifa_list);
 	if (in_dev->dead)
 		goto no_promotions;
 
@@ -371,7 +370,16 @@  static void __inet_del_ifa(struct in_device *in_dev,
 	 **/
 
 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
-		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
+		struct in_ifaddr __rcu **ifap1 = &in_dev->ifa_list;
+
+		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
+			if (ifa1 == ifa)
+				break;
+			last_prim = ifa;
+			ifap1 = &ifa->ifa_next;
+		}
+
+		ifap1 = &ifa1->ifa_next;
 
 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
@@ -440,9 +448,15 @@  static void __inet_del_ifa(struct in_device *in_dev,
 
 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
 
-			last_sec = rtnl_dereference(last_prim->ifa_next);
-			rcu_assign_pointer(promote->ifa_next, last_sec);
-			rcu_assign_pointer(last_prim->ifa_next, promote);
+			if (last_prim) {
+				last_sec = rtnl_dereference(last_prim->ifa_next);
+				rcu_assign_pointer(promote->ifa_next, last_sec);
+				rcu_assign_pointer(last_prim->ifa_next, promote);
+			} else {
+				rcu_assign_pointer(promote->ifa_next,
+						   rtnl_dereference(in_dev->ifa_list));
+				rcu_assign_pointer(in_dev->ifa_list, promote);
+			}
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;