diff mbox series

[net-next,3/4] nexthop: Do not return invalid nexthop object during multipath selection

Message ID 20230529201914.69828-4-bpoirier@nvidia.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series nexthop: Refactor and fix nexthop selection for multipath routes | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 11 this patch: 11
netdev/cc_maintainers warning 4 maintainers not CCed: kuba@kernel.org edumazet@google.com davem@davemloft.net pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 11 this patch: 11
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 38 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Benjamin Poirier May 29, 2023, 8:19 p.m. UTC
With legacy nexthops, when net.ipv4.fib_multipath_use_neigh is set,
fib_select_multipath() will never set res->nhc to a nexthop that is not
good (as per fib_good_nh()). OTOH, with nexthop objects,
nexthop_select_path_hthr() may return a nexthop that failed the
nexthop_is_good_nh() test even if there was one that passed. Refactor
nexthop_select_path_hthr() to follow a selection logic more similar to
fib_select_multipath().

The issue can be demonstrated with the following sequence of commands. The
first block shows that things work as expected with legacy nexthops. The
last sequence of `ip rou get` in the second block shows the problem case -
some routes still use the .2 nexthop.

sysctl net.ipv4.fib_multipath_use_neigh=1
ip link add dummy1 up type dummy
ip rou add 198.51.100.0/24 nexthop via 192.0.2.1 dev dummy1 onlink nexthop via 192.0.2.2 dev dummy1 onlink
for i in {10..19}; do ip -o rou get 198.51.100.$i; done
ip neigh add 192.0.2.1 dev dummy1 nud failed
echo ".1 failed:"  # results should not use .1
for i in {10..19}; do ip -o rou get 198.51.100.$i; done
ip neigh del 192.0.2.1 dev dummy1
ip neigh add 192.0.2.2 dev dummy1 nud failed
echo ".2 failed:"  # results should not use .2
for i in {10..19}; do ip -o rou get 198.51.100.$i; done
ip link del dummy1

ip link add dummy1 up type dummy
ip nexthop add id 1 via 192.0.2.1 dev dummy1 onlink
ip nexthop add id 2 via 192.0.2.2 dev dummy1 onlink
ip nexthop add id 1001 group 1/2
ip rou add 198.51.100.0/24 nhid 1001
for i in {10..19}; do ip -o rou get 198.51.100.$i; done
ip neigh add 192.0.2.1 dev dummy1 nud failed
echo ".1 failed:"  # results should not use .1
for i in {10..19}; do ip -o rou get 198.51.100.$i; done
ip neigh del 192.0.2.1 dev dummy1
ip neigh add 192.0.2.2 dev dummy1 nud failed
echo ".2 failed:"  # results should not use .2
for i in {10..19}; do ip -o rou get 198.51.100.$i; done
ip link del dummy1

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
---
 net/ipv4/nexthop.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

Comments

David Ahern May 30, 2023, 3:08 p.m. UTC | #1
On 5/29/23 2:19 PM, Benjamin Poirier wrote:
> diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
> index c12acbf39659..ca501ced04fb 100644
> --- a/net/ipv4/nexthop.c
> +++ b/net/ipv4/nexthop.c
> @@ -1186,6 +1186,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
>  static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
>  {
>  	struct nexthop *rc = NULL;
> +	bool first = false;
>  	int i;
>  
>  	if (nhg->fdb_nh)
> @@ -1194,20 +1195,24 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
>  	for (i = 0; i < nhg->num_nh; ++i) {
>  		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
>  
> -		if (hash > atomic_read(&nhge->hthr.upper_bound))
> -			continue;
> -
>  		/* nexthops always check if it is good and does
>  		 * not rely on a sysctl for this behavior
>  		 */
> -		if (nexthop_is_good_nh(nhge->nh))
> -			return nhge->nh;
> +		if (!nexthop_is_good_nh(nhge->nh))
> +			continue;
>  
> -		if (!rc)
> +		if (!first) {

Setting 'first' and 'rc' are equivalent, so 'first' is not needed. As I
recall it was used in fib_select_multipath before the nexthop
refactoring (eba618abacade) because nhsel == 0 is valid, so the loop
could not rely on it.



>  			rc = nhge->nh;
> +			first = true;
> +		}
> +
> +		if (hash > atomic_read(&nhge->hthr.upper_bound))
> +			continue;
> +
> +		return nhge->nh;
>  	}
>  
> -	return rc;
> +	return rc ? : nhg->nh_entries[0].nh;
>  }
>  
>  static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
Ido Schimmel May 31, 2023, 6:04 a.m. UTC | #2
On Tue, May 30, 2023 at 09:08:01AM -0600, David Ahern wrote:
> On 5/29/23 2:19 PM, Benjamin Poirier wrote:
> > diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
> > index c12acbf39659..ca501ced04fb 100644
> > --- a/net/ipv4/nexthop.c
> > +++ b/net/ipv4/nexthop.c
> > @@ -1186,6 +1186,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
> >  static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
> >  {
> >  	struct nexthop *rc = NULL;
> > +	bool first = false;
> >  	int i;
> >  
> >  	if (nhg->fdb_nh)
> > @@ -1194,20 +1195,24 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
> >  	for (i = 0; i < nhg->num_nh; ++i) {
> >  		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
> >  
> > -		if (hash > atomic_read(&nhge->hthr.upper_bound))
> > -			continue;
> > -
> >  		/* nexthops always check if it is good and does
> >  		 * not rely on a sysctl for this behavior
> >  		 */
> > -		if (nexthop_is_good_nh(nhge->nh))
> > -			return nhge->nh;
> > +		if (!nexthop_is_good_nh(nhge->nh))
> > +			continue;
> >  
> > -		if (!rc)
> > +		if (!first) {
> 
> Setting 'first' and 'rc' are equivalent, so 'first' is not needed.

Yea, looking at it again not sure what I was thinking...

Thanks for the review!

> As I recall it was used in fib_select_multipath before the nexthop
> refactoring (eba618abacade) because nhsel == 0 is valid, so the loop
> could not rely on it.
> 
> 
> 
> >  			rc = nhge->nh;
> > +			first = true;
> > +		}
> > +
> > +		if (hash > atomic_read(&nhge->hthr.upper_bound))
> > +			continue;
> > +
> > +		return nhge->nh;
> >  	}
> >  
> > -	return rc;
> > +	return rc ? : nhg->nh_entries[0].nh;
> >  }
> >  
> >  static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
> 
>
diff mbox series

Patch

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index c12acbf39659..ca501ced04fb 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1186,6 +1186,7 @@  static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
 static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
 {
 	struct nexthop *rc = NULL;
+	bool first = false;
 	int i;
 
 	if (nhg->fdb_nh)
@@ -1194,20 +1195,24 @@  static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
 	for (i = 0; i < nhg->num_nh; ++i) {
 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
 
-		if (hash > atomic_read(&nhge->hthr.upper_bound))
-			continue;
-
 		/* nexthops always check if it is good and does
 		 * not rely on a sysctl for this behavior
 		 */
-		if (nexthop_is_good_nh(nhge->nh))
-			return nhge->nh;
+		if (!nexthop_is_good_nh(nhge->nh))
+			continue;
 
-		if (!rc)
+		if (!first) {
 			rc = nhge->nh;
+			first = true;
+		}
+
+		if (hash > atomic_read(&nhge->hthr.upper_bound))
+			continue;
+
+		return nhge->nh;
 	}
 
-	return rc;
+	return rc ? : nhg->nh_entries[0].nh;
 }
 
 static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)