diff mbox series

vrf/mcast: Fix mcast routing when using vrf.

Message ID 20220311172509.10992-1-greearb@candelatech.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series vrf/mcast: Fix mcast routing when using vrf. | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/cc_maintainers fail 4 maintainers not CCed: davem@davemloft.net yoshfuji@linux-ipv6.org kuba@kernel.org dsahern@kernel.org
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Ben Greear March 11, 2022, 5:25 p.m. UTC
From: Ben Greear <greearb@candelatech.com>

The problem case is where you have a VRF with 2 ports.  Assume
eth1 points towards upstream and has default gateway, eth2 points
towards a local subnet.

User binds a UDP multicast socket to eth2 and attempts to send
multicast traffic on a non-local-multicast address (something other
than 224.0.0.x/24).  There is no multicast router daemon in this case.

The flow through the kernel will start out with the correct oif (eth2),
but in this logic, the oif becomes the index of the vrf device instead:

    /* update flow if oif or iif point to device enslaved to l3mdev */
    l3mdev_update_flow(net, flowi4_to_flowi(flp));

After that, the mcast routing logic will choose the eth1 interface
as the output device, and the code in __mkroute_output will in the
end cause the frame to be sent out eth1 instead of the desired eth2.

To fix this, add special case logic to detect this in __mkroute_output
and instead use the user-specified port as the output device when
there is no specific mcast route available.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 net/ipv4/route.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

Comments

David Ahern March 12, 2022, 12:54 a.m. UTC | #1
On 3/11/22 10:25 AM, greearb@candelatech.com wrote:
> @@ -2516,12 +2517,26 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
>  			flags &= ~RTCF_LOCAL;
>  		else
>  			do_cache = false;
> -		/* If multicast route do not exist use
> -		 * default one, but do not gateway in this case.
> +		/* If multicast route does not exist use
> +		 * default one, but do not use gateway in this case.
>  		 * Yes, it is hack.

I think I have a better way to do this. Let's see how the testing goes.
diff mbox series

Patch

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f33ad1f383b6..722df8fcf417 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2475,6 +2475,7 @@  int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 /* called with rcu_read_lock() */
 static struct rtable *__mkroute_output(const struct fib_result *res,
 				       const struct flowi4 *fl4, int orig_oif,
+				       struct net_device *orig_dev_out,
 				       struct net_device *dev_out,
 				       unsigned int flags)
 {
@@ -2516,12 +2517,26 @@  static struct rtable *__mkroute_output(const struct fib_result *res,
 			flags &= ~RTCF_LOCAL;
 		else
 			do_cache = false;
-		/* If multicast route do not exist use
-		 * default one, but do not gateway in this case.
+		/* If multicast route does not exist use
+		 * default one, but do not use gateway in this case.
 		 * Yes, it is hack.
 		 */
-		if (fi && res->prefixlen < 4)
+		if (fi && res->prefixlen < 4) {
+			struct net *net = dev_net(dev_out);
+
 			fi = NULL;
+
+			if (orig_oif && orig_dev_out &&
+			    dev_out->ifindex != orig_oif &&
+			    netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+				/* vrf overwrites the original flowi4_oif for
+				 * member network devices.  In that case,
+				 * lets use the user-specified oif instead of
+				 * a default route.
+				 */
+				dev_out = orig_dev_out;
+			}
+		}
 	} else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
 		   (orig_oif != dev_out->ifindex)) {
 		/* For local routes that require a particular output interface
@@ -2630,6 +2645,7 @@  struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 					    const struct sk_buff *skb)
 {
 	struct net_device *dev_out = NULL;
+	struct net_device *orig_dev_out = NULL;
 	int orig_oif = fl4->flowi4_oif;
 	unsigned int flags = 0;
 	struct rtable *rth;
@@ -2785,12 +2801,13 @@  struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 		goto make_route;
 	}
 
+	orig_dev_out = dev_out;
 	fib_select_path(net, res, fl4, skb);
 
 	dev_out = FIB_RES_DEV(*res);
 
 make_route:
-	rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
+	rth = __mkroute_output(res, fl4, orig_oif, orig_dev_out, dev_out, flags);
 
 out:
 	return rth;