diff mbox series

[RFC,net] net: ipv6: make fib6_nh_init properly clean after itself on error

Message ID 20211129141151.490533-1-razor@blackwall.org (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series [RFC,net] net: ipv6: make fib6_nh_init properly clean after itself on error | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 15 this patch: 17
netdev/cc_maintainers fail 2 blamed authors not CCed: idosch@mellanox.com dsahern@kernel.org; 3 maintainers not CCed: yoshfuji@linux-ipv6.org idosch@mellanox.com dsahern@kernel.org
netdev/build_clang fail Errors and warnings before: 20 this patch: 22
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn fail Errors and warnings before: 6 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 57 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Nikolay Aleksandrov Nov. 29, 2021, 2:11 p.m. UTC
From: Nikolay Aleksandrov <nikolay@nvidia.com>

Currently we have different cleanup expectations by different users of
fib6_nh_init:
 1. nh_create_ipv6
 - calls fib6_nh_release manually which does full cleanup

 2. ip6_route_info_create
 - calls fib6_info_release to drop refs to 0 and schedules rcu call
   for fib6_info_destroy_rcu() which also does full cleanup

 3. fib_check_nh_v6_gw
 - doesn't do any cleanup on error, expects fib6_nh_init to clean up
   after itself fully (nhc_pcpu_rth_output per-cpu memory leak on error)

We can alter fib6_nh_init to properly cleanup after itself so
expectations would be the same for everyone and noone would have to do
anything in such case. It is safe because the route is not inserted yet
so the fib6_nh should not be visible at fib6_nh_init point, thus it
should be possible to free up all resources in its error path. The
problems (and leaks) are because it doesn't free all resources in its
error path, the nhc_pcpu_rth_output per-cpu allocation done by
fib_nh_common_init is not cleaned up and it expects its callers to clean
up if an error occurred after it, e.g. the dst per-cpu allocation
might fail. This change allows us to fix the memory leak at 3. and also
to simplify nh_create_ipv6 and remove the special error handling.

Fixes: 717a8f5b2923 ("ipv4: Add fib_check_nh_v6_gw")
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
---
Sending as RFC to see what people think. I've tested this patch under
heavy load with replacing/traffic forwarding/nexthop add/del etc.
I've also tested error paths by adding artificial ENOMEM errors in
different fib6_nh_init stages while running kmemleak.

 net/ipv4/nexthop.c |  8 +-------
 net/ipv6/route.c   | 15 +++++++++------
 2 files changed, 10 insertions(+), 13 deletions(-)

Comments

Ido Schimmel Nov. 30, 2021, 12:40 p.m. UTC | #1
On Mon, Nov 29, 2021 at 04:11:51PM +0200, Nikolay Aleksandrov wrote:
> diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
> index 5dbd4b5505eb..a7debafe8b90 100644
> --- a/net/ipv4/nexthop.c
> +++ b/net/ipv4/nexthop.c
> @@ -2565,14 +2565,8 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
>  	/* sets nh_dev if successful */
>  	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
>  				      extack);
> -	if (err) {
> -		/* IPv6 is not enabled, don't call fib6_nh_release */
> -		if (err == -EAFNOSUPPORT)
> -			goto out;
> -		ipv6_stub->fib6_nh_release(fib6_nh);
> -	} else {
> +	if (!err)
>  		nh->nh_flags = fib6_nh->fib_nh_flags;
> -	}
>  out:
>  	return err;
>  }

This hunk looks good

> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index 42d60c76d30a..2107b13cc9ab 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -3635,7 +3635,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
>  		in6_dev_put(idev);
>  
>  	if (err) {
> -		lwtstate_put(fib6_nh->fib_nh_lws);
> +		/* check if we failed after fib_nh_common_init() was called */
> +		if (fib6_nh->nh_common.nhc_pcpu_rth_output)
> +			fib_nh_common_release(&fib6_nh->nh_common);
>  		fib6_nh->fib_nh_lws = NULL;
>  		dev_put(dev);
>  	}

Likewise

> @@ -3822,7 +3824,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>  	} else {
>  		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
>  		if (err)
> -			goto out;
> +			goto out_free;
>  
>  		fib6_nh = rt->fib6_nh;
>  
> @@ -3841,7 +3843,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>  		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
>  			NL_SET_ERR_MSG(extack, "Invalid source address");
>  			err = -EINVAL;
> -			goto out;
> +			goto out_free;
>  		}
>  		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
>  		rt->fib6_prefsrc.plen = 128;
> @@ -3849,12 +3851,13 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>  		rt->fib6_prefsrc.plen = 0;
>  
>  	return rt;
> -out:
> -	fib6_info_release(rt);
> -	return ERR_PTR(err);
> +
>  out_free:
>  	ip_fib_metrics_put(rt->fib6_metrics);
> +	if (rt->nh)
> +		nexthop_put(rt->nh);

Shouldn't this be above ip_fib_metrics_put() given nexthop_get() is
called after ip_fib_metrics_init() ?

Also, shouldn't we call fib6_nh_release() if fib6_nh_init() succeeded
and we failed later?

>  	kfree(rt);
> +out:
>  	return ERR_PTR(err);
>  }
>  
> -- 
> 2.31.1
>
Nikolay Aleksandrov Nov. 30, 2021, 12:48 p.m. UTC | #2
On 30/11/2021 14:40, Ido Schimmel wrote:
> On Mon, Nov 29, 2021 at 04:11:51PM +0200, Nikolay Aleksandrov wrote:
>> diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
>> index 5dbd4b5505eb..a7debafe8b90 100644
>> --- a/net/ipv4/nexthop.c
>> +++ b/net/ipv4/nexthop.c
>> @@ -2565,14 +2565,8 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
>>  	/* sets nh_dev if successful */
>>  	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
>>  				      extack);
>> -	if (err) {
>> -		/* IPv6 is not enabled, don't call fib6_nh_release */
>> -		if (err == -EAFNOSUPPORT)
>> -			goto out;
>> -		ipv6_stub->fib6_nh_release(fib6_nh);
>> -	} else {
>> +	if (!err)
>>  		nh->nh_flags = fib6_nh->fib_nh_flags;
>> -	}
>>  out:
>>  	return err;
>>  }
> 
> This hunk looks good
> 
>> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
>> index 42d60c76d30a..2107b13cc9ab 100644
>> --- a/net/ipv6/route.c
>> +++ b/net/ipv6/route.c
>> @@ -3635,7 +3635,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
>>  		in6_dev_put(idev);
>>  
>>  	if (err) {
>> -		lwtstate_put(fib6_nh->fib_nh_lws);
>> +		/* check if we failed after fib_nh_common_init() was called */
>> +		if (fib6_nh->nh_common.nhc_pcpu_rth_output)
>> +			fib_nh_common_release(&fib6_nh->nh_common);
>>  		fib6_nh->fib_nh_lws = NULL;
>>  		dev_put(dev);
>>  	}
> 
> Likewise
> 
>> @@ -3822,7 +3824,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>  	} else {
>>  		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
>>  		if (err)
>> -			goto out;
>> +			goto out_free;
>>  
>>  		fib6_nh = rt->fib6_nh;
>>  
>> @@ -3841,7 +3843,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>  		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
>>  			NL_SET_ERR_MSG(extack, "Invalid source address");
>>  			err = -EINVAL;
>> -			goto out;
>> +			goto out_free;
>>  		}
>>  		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
>>  		rt->fib6_prefsrc.plen = 128;
>> @@ -3849,12 +3851,13 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>  		rt->fib6_prefsrc.plen = 0;
>>  
>>  	return rt;
>> -out:
>> -	fib6_info_release(rt);
>> -	return ERR_PTR(err);
>> +
>>  out_free:
>>  	ip_fib_metrics_put(rt->fib6_metrics);
>> +	if (rt->nh)
>> +		nexthop_put(rt->nh);
> 
> Shouldn't this be above ip_fib_metrics_put() given nexthop_get() is
> called after ip_fib_metrics_init() ?
> 

yeah, that's ok for symmetry

> Also, shouldn't we call fib6_nh_release() if fib6_nh_init() succeeded
> and we failed later?
> 

Hmm, that's a clear bug. I was only looking at nexthop objects and completely
missed that there's an error possibility after the non-nexthop path.
You're correct, and in fact we have to add another error label specifically for
the non-nexthop case because we shouldn't do it in the nexthop case.

>>  	kfree(rt);
>> +out:
>>  	return ERR_PTR(err);
>>  }
>>  
>> -- 
>> 2.31.1
>>
David Ahern Nov. 30, 2021, 4:01 p.m. UTC | #3
On 11/30/21 5:40 AM, Ido Schimmel wrote:
> On Mon, Nov 29, 2021 at 04:11:51PM +0200, Nikolay Aleksandrov wrote:
>> diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
>> index 5dbd4b5505eb..a7debafe8b90 100644
>> --- a/net/ipv4/nexthop.c
>> +++ b/net/ipv4/nexthop.c
>> @@ -2565,14 +2565,8 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
>>  	/* sets nh_dev if successful */
>>  	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
>>  				      extack);
>> -	if (err) {
>> -		/* IPv6 is not enabled, don't call fib6_nh_release */
>> -		if (err == -EAFNOSUPPORT)
>> -			goto out;
>> -		ipv6_stub->fib6_nh_release(fib6_nh);
>> -	} else {
>> +	if (!err)
>>  		nh->nh_flags = fib6_nh->fib_nh_flags;
>> -	}
>>  out:
>>  	return err;
>>  }
> 
> This hunk looks good

agreed, but it should be a no-op now so this should be a net-next
cleanup patch.

> 
>> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
>> index 42d60c76d30a..2107b13cc9ab 100644
>> --- a/net/ipv6/route.c
>> +++ b/net/ipv6/route.c
>> @@ -3635,7 +3635,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
>>  		in6_dev_put(idev);
>>  
>>  	if (err) {
>> -		lwtstate_put(fib6_nh->fib_nh_lws);
>> +		/* check if we failed after fib_nh_common_init() was called */
>> +		if (fib6_nh->nh_common.nhc_pcpu_rth_output)
>> +			fib_nh_common_release(&fib6_nh->nh_common);
>>  		fib6_nh->fib_nh_lws = NULL;
>>  		dev_put(dev);
>>  	}
> 
> Likewise

this is a leak in the current code and should go through -net as a
separate patch.

> 
>> @@ -3822,7 +3824,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>  	} else {
>>  		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
>>  		if (err)
>> -			goto out;
>> +			goto out_free;
>>  
>>  		fib6_nh = rt->fib6_nh;
>>  
>> @@ -3841,7 +3843,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>  		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
>>  			NL_SET_ERR_MSG(extack, "Invalid source address");
>>  			err = -EINVAL;
>> -			goto out;
>> +			goto out_free;
>>  		}
>>  		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
>>  		rt->fib6_prefsrc.plen = 128;
>> @@ -3849,12 +3851,13 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>  		rt->fib6_prefsrc.plen = 0;
>>  
>>  	return rt;
>> -out:
>> -	fib6_info_release(rt);
>> -	return ERR_PTR(err);
>> +
>>  out_free:
>>  	ip_fib_metrics_put(rt->fib6_metrics);
>> +	if (rt->nh)
>> +		nexthop_put(rt->nh);
> 
> Shouldn't this be above ip_fib_metrics_put() given nexthop_get() is
> called after ip_fib_metrics_init() ?
> 
> Also, shouldn't we call fib6_nh_release() if fib6_nh_init() succeeded
> and we failed later?

similarly I think this cleanup is a separate patch.


> 
>>  	kfree(rt);
>> +out:
>>  	return ERR_PTR(err);
>>  }
>>  
>> -- 
>> 2.31.1
>>
Nikolay Aleksandrov Nov. 30, 2021, 4:45 p.m. UTC | #4
On 30/11/2021 18:01, David Ahern wrote:
> On 11/30/21 5:40 AM, Ido Schimmel wrote:
>> On Mon, Nov 29, 2021 at 04:11:51PM +0200, Nikolay Aleksandrov wrote:
>>> diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
>>> index 5dbd4b5505eb..a7debafe8b90 100644
>>> --- a/net/ipv4/nexthop.c
>>> +++ b/net/ipv4/nexthop.c
>>> @@ -2565,14 +2565,8 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
>>>  	/* sets nh_dev if successful */
>>>  	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
>>>  				      extack);
>>> -	if (err) {
>>> -		/* IPv6 is not enabled, don't call fib6_nh_release */
>>> -		if (err == -EAFNOSUPPORT)
>>> -			goto out;
>>> -		ipv6_stub->fib6_nh_release(fib6_nh);
>>> -	} else {
>>> +	if (!err)
>>>  		nh->nh_flags = fib6_nh->fib_nh_flags;
>>> -	}
>>>  out:
>>>  	return err;
>>>  }
>>
>> This hunk looks good
> 
> agreed, but it should be a no-op now so this should be a net-next
> cleanup patch.
> 

Actually it is needed, it's not a cleanup or noop. If fib6_nh_init fails after fib_nh_common_init
in the per-cpu allocation then fib6_nh->nh_common's pointers will still be there but
freed, so it will lead to double free. We have to NULL them when freeing if we want to avoid that.

>>
>>> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
>>> index 42d60c76d30a..2107b13cc9ab 100644
>>> --- a/net/ipv6/route.c
>>> +++ b/net/ipv6/route.c
>>> @@ -3635,7 +3635,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
>>>  		in6_dev_put(idev);
>>>  
>>>  	if (err) {
>>> -		lwtstate_put(fib6_nh->fib_nh_lws);
>>> +		/* check if we failed after fib_nh_common_init() was called */
>>> +		if (fib6_nh->nh_common.nhc_pcpu_rth_output)
>>> +			fib_nh_common_release(&fib6_nh->nh_common);
>>>  		fib6_nh->fib_nh_lws = NULL;
>>>  		dev_put(dev);
>>>  	}
>>
>> Likewise
> 
> this is a leak in the current code and should go through -net as a
> separate patch.
> 

Yep, this is the point of this patch. :)

>>
>>> @@ -3822,7 +3824,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>  	} else {
>>>  		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
>>>  		if (err)
>>> -			goto out;
>>> +			goto out_free;
>>>  
>>>  		fib6_nh = rt->fib6_nh;
>>>  
>>> @@ -3841,7 +3843,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>  		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
>>>  			NL_SET_ERR_MSG(extack, "Invalid source address");
>>>  			err = -EINVAL;
>>> -			goto out;
>>> +			goto out_free;
>>>  		}
>>>  		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
>>>  		rt->fib6_prefsrc.plen = 128;
>>> @@ -3849,12 +3851,13 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>  		rt->fib6_prefsrc.plen = 0;
>>>  
>>>  	return rt;
>>> -out:
>>> -	fib6_info_release(rt);
>>> -	return ERR_PTR(err);
>>> +
>>>  out_free:
>>>  	ip_fib_metrics_put(rt->fib6_metrics);
>>> +	if (rt->nh)
>>> +		nexthop_put(rt->nh);
>>
>> Shouldn't this be above ip_fib_metrics_put() given nexthop_get() is
>> called after ip_fib_metrics_init() ?
>>
>> Also, shouldn't we call fib6_nh_release() if fib6_nh_init() succeeded
>> and we failed later?
> 
> similarly I think this cleanup is a separate patch.
> 

Same thing, fib6_info_destroy_rcu -> fib6_nh_release would double-free the nh_common parts
if fib6_nh_init fails in the per-cpu allocation after fib_nh_common_init.
It is not a cleanup, but a result of the fix. If we want to keep it, we'll have to NULL
the nh_common parts when freeing them in fib_nh_common_release().

> 
>>
>>>  	kfree(rt);
>>> +out:
>>>  	return ERR_PTR(err);
>>>  }
>>>  
>>> -- 
>>> 2.31.1
>>>
>
David Ahern Nov. 30, 2021, 5:18 p.m. UTC | #5
On 11/30/21 9:45 AM, Nikolay Aleksandrov wrote:
> On 30/11/2021 18:01, David Ahern wrote:
>> On 11/30/21 5:40 AM, Ido Schimmel wrote:
>>> On Mon, Nov 29, 2021 at 04:11:51PM +0200, Nikolay Aleksandrov wrote:
>>>> diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
>>>> index 5dbd4b5505eb..a7debafe8b90 100644
>>>> --- a/net/ipv4/nexthop.c
>>>> +++ b/net/ipv4/nexthop.c
>>>> @@ -2565,14 +2565,8 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
>>>>  	/* sets nh_dev if successful */
>>>>  	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
>>>>  				      extack);
>>>> -	if (err) {
>>>> -		/* IPv6 is not enabled, don't call fib6_nh_release */
>>>> -		if (err == -EAFNOSUPPORT)
>>>> -			goto out;
>>>> -		ipv6_stub->fib6_nh_release(fib6_nh);
>>>> -	} else {
>>>> +	if (!err)
>>>>  		nh->nh_flags = fib6_nh->fib_nh_flags;
>>>> -	}
>>>>  out:
>>>>  	return err;
>>>>  }
>>>
>>> This hunk looks good
>>
>> agreed, but it should be a no-op now so this should be a net-next
>> cleanup patch.
>>
> 
> Actually it is needed, it's not a cleanup or noop. If fib6_nh_init fails after fib_nh_common_init
> in the per-cpu allocation then fib6_nh->nh_common's pointers will still be there but
> freed, so it will lead to double free. We have to NULL them when freeing if we want to avoid that.

fib6_nh_init should do proper cleanup if it hits an error. Your bug fix
to get nhc_pcpu_rth_output freed should complete that. It can also set
the value to NULL to avoid double free on any code path.


> 
>>>
>>>> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
>>>> index 42d60c76d30a..2107b13cc9ab 100644
>>>> --- a/net/ipv6/route.c
>>>> +++ b/net/ipv6/route.c
>>>> @@ -3635,7 +3635,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
>>>>  		in6_dev_put(idev);
>>>>  
>>>>  	if (err) {
>>>> -		lwtstate_put(fib6_nh->fib_nh_lws);
>>>> +		/* check if we failed after fib_nh_common_init() was called */
>>>> +		if (fib6_nh->nh_common.nhc_pcpu_rth_output)
>>>> +			fib_nh_common_release(&fib6_nh->nh_common);
>>>>  		fib6_nh->fib_nh_lws = NULL;
>>>>  		dev_put(dev);
>>>>  	}
>>>
>>> Likewise
>>
>> this is a leak in the current code and should go through -net as a
>> separate patch.
>>
> 
> Yep, this is the point of this patch. :)
> 
>>>
>>>> @@ -3822,7 +3824,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>>  	} else {
>>>>  		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
>>>>  		if (err)
>>>> -			goto out;
>>>> +			goto out_free;
>>>>  
>>>>  		fib6_nh = rt->fib6_nh;
>>>>  
>>>> @@ -3841,7 +3843,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>>  		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
>>>>  			NL_SET_ERR_MSG(extack, "Invalid source address");
>>>>  			err = -EINVAL;
>>>> -			goto out;
>>>> +			goto out_free;
>>>>  		}
>>>>  		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
>>>>  		rt->fib6_prefsrc.plen = 128;
>>>> @@ -3849,12 +3851,13 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>>  		rt->fib6_prefsrc.plen = 0;
>>>>  
>>>>  	return rt;
>>>> -out:
>>>> -	fib6_info_release(rt);
>>>> -	return ERR_PTR(err);
>>>> +
>>>>  out_free:
>>>>  	ip_fib_metrics_put(rt->fib6_metrics);
>>>> +	if (rt->nh)
>>>> +		nexthop_put(rt->nh);
>>>
>>> Shouldn't this be above ip_fib_metrics_put() given nexthop_get() is
>>> called after ip_fib_metrics_init() ?
>>>
>>> Also, shouldn't we call fib6_nh_release() if fib6_nh_init() succeeded
>>> and we failed later?
>>
>> similarly I think this cleanup is a separate patch.
>>
> 
> Same thing, fib6_info_destroy_rcu -> fib6_nh_release would double-free the nh_common parts
> if fib6_nh_init fails in the per-cpu allocation after fib_nh_common_init.
> It is not a cleanup, but a result of the fix. If we want to keep it, we'll have to NULL
> the nh_common parts when freeing them in fib_nh_common_release().

exactly. set it to NULL and make the -net patch as simple as possible
Nikolay Aleksandrov Nov. 30, 2021, 9:30 p.m. UTC | #6
On 30/11/2021 19:18, David Ahern wrote:
> On 11/30/21 9:45 AM, Nikolay Aleksandrov wrote:
>> On 30/11/2021 18:01, David Ahern wrote:
>>> On 11/30/21 5:40 AM, Ido Schimmel wrote:
>>>> On Mon, Nov 29, 2021 at 04:11:51PM +0200, Nikolay Aleksandrov wrote:
>>>>> diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
>>>>> index 5dbd4b5505eb..a7debafe8b90 100644
>>>>> --- a/net/ipv4/nexthop.c
>>>>> +++ b/net/ipv4/nexthop.c
>>>>> @@ -2565,14 +2565,8 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
>>>>>  	/* sets nh_dev if successful */
>>>>>  	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
>>>>>  				      extack);
>>>>> -	if (err) {
>>>>> -		/* IPv6 is not enabled, don't call fib6_nh_release */
>>>>> -		if (err == -EAFNOSUPPORT)
>>>>> -			goto out;
>>>>> -		ipv6_stub->fib6_nh_release(fib6_nh);
>>>>> -	} else {
>>>>> +	if (!err)
>>>>>  		nh->nh_flags = fib6_nh->fib_nh_flags;
>>>>> -	}
>>>>>  out:
>>>>>  	return err;
>>>>>  }
>>>>
>>>> This hunk looks good
>>>
>>> agreed, but it should be a no-op now so this should be a net-next
>>> cleanup patch.
>>>
>>
>> Actually it is needed, it's not a cleanup or noop. If fib6_nh_init fails after fib_nh_common_init
>> in the per-cpu allocation then fib6_nh->nh_common's pointers will still be there but
>> freed, so it will lead to double free. We have to NULL them when freeing if we want to avoid that.
> 
> fib6_nh_init should do proper cleanup if it hits an error. Your bug fix
> to get nhc_pcpu_rth_output freed should complete that. It can also set
> the value to NULL to avoid double free on any code path.
> 

Indeed, that's another way of achieving the same goal.

> 
>>
>>>>
>>>>> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
>>>>> index 42d60c76d30a..2107b13cc9ab 100644
>>>>> --- a/net/ipv6/route.c
>>>>> +++ b/net/ipv6/route.c
>>>>> @@ -3635,7 +3635,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
>>>>>  		in6_dev_put(idev);
>>>>>  
>>>>>  	if (err) {
>>>>> -		lwtstate_put(fib6_nh->fib_nh_lws);
>>>>> +		/* check if we failed after fib_nh_common_init() was called */
>>>>> +		if (fib6_nh->nh_common.nhc_pcpu_rth_output)
>>>>> +			fib_nh_common_release(&fib6_nh->nh_common);
>>>>>  		fib6_nh->fib_nh_lws = NULL;
>>>>>  		dev_put(dev);
>>>>>  	}
>>>>
>>>> Likewise
>>>
>>> this is a leak in the current code and should go through -net as a
>>> separate patch.
>>>
>>
>> Yep, this is the point of this patch. :)
>>
>>>>
>>>>> @@ -3822,7 +3824,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>>>  	} else {
>>>>>  		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
>>>>>  		if (err)
>>>>> -			goto out;
>>>>> +			goto out_free;
>>>>>  
>>>>>  		fib6_nh = rt->fib6_nh;
>>>>>  
>>>>> @@ -3841,7 +3843,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>>>  		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
>>>>>  			NL_SET_ERR_MSG(extack, "Invalid source address");
>>>>>  			err = -EINVAL;
>>>>> -			goto out;
>>>>> +			goto out_free;
>>>>>  		}
>>>>>  		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
>>>>>  		rt->fib6_prefsrc.plen = 128;
>>>>> @@ -3849,12 +3851,13 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
>>>>>  		rt->fib6_prefsrc.plen = 0;
>>>>>  
>>>>>  	return rt;
>>>>> -out:
>>>>> -	fib6_info_release(rt);
>>>>> -	return ERR_PTR(err);
>>>>> +
>>>>>  out_free:
>>>>>  	ip_fib_metrics_put(rt->fib6_metrics);
>>>>> +	if (rt->nh)
>>>>> +		nexthop_put(rt->nh);
>>>>
>>>> Shouldn't this be above ip_fib_metrics_put() given nexthop_get() is
>>>> called after ip_fib_metrics_init() ?
>>>>
>>>> Also, shouldn't we call fib6_nh_release() if fib6_nh_init() succeeded
>>>> and we failed later?
>>>
>>> similarly I think this cleanup is a separate patch.
>>>
>>
>> Same thing, fib6_info_destroy_rcu -> fib6_nh_release would double-free the nh_common parts
>> if fib6_nh_init fails in the per-cpu allocation after fib_nh_common_init.
>> It is not a cleanup, but a result of the fix. If we want to keep it, we'll have to NULL
>> the nh_common parts when freeing them in fib_nh_common_release().
> 
> exactly. set it to NULL and make the -net patch as simple as possible
> 

Sure, of course. I'd prefer to make the code consistent w.r.t expectations regardless of
the release cycle, but I don't have a strong preference. I'll post the fix with
NULLing the nh_common pointers.

Cheers,
 Nik
diff mbox series

Patch

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5dbd4b5505eb..a7debafe8b90 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -2565,14 +2565,8 @@  static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
 	/* sets nh_dev if successful */
 	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
 				      extack);
-	if (err) {
-		/* IPv6 is not enabled, don't call fib6_nh_release */
-		if (err == -EAFNOSUPPORT)
-			goto out;
-		ipv6_stub->fib6_nh_release(fib6_nh);
-	} else {
+	if (!err)
 		nh->nh_flags = fib6_nh->fib_nh_flags;
-	}
 out:
 	return err;
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 42d60c76d30a..2107b13cc9ab 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3635,7 +3635,9 @@  int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 		in6_dev_put(idev);
 
 	if (err) {
-		lwtstate_put(fib6_nh->fib_nh_lws);
+		/* check if we failed after fib_nh_common_init() was called */
+		if (fib6_nh->nh_common.nhc_pcpu_rth_output)
+			fib_nh_common_release(&fib6_nh->nh_common);
 		fib6_nh->fib_nh_lws = NULL;
 		dev_put(dev);
 	}
@@ -3822,7 +3824,7 @@  static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 	} else {
 		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
 		if (err)
-			goto out;
+			goto out_free;
 
 		fib6_nh = rt->fib6_nh;
 
@@ -3841,7 +3843,7 @@  static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
 			NL_SET_ERR_MSG(extack, "Invalid source address");
 			err = -EINVAL;
-			goto out;
+			goto out_free;
 		}
 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
 		rt->fib6_prefsrc.plen = 128;
@@ -3849,12 +3851,13 @@  static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 		rt->fib6_prefsrc.plen = 0;
 
 	return rt;
-out:
-	fib6_info_release(rt);
-	return ERR_PTR(err);
+
 out_free:
 	ip_fib_metrics_put(rt->fib6_metrics);
+	if (rt->nh)
+		nexthop_put(rt->nh);
 	kfree(rt);
+out:
 	return ERR_PTR(err);
 }