diff mbox series

[net-next,v2,01/24] net: resolve forwarding path from virtual netdevice and HW destination address

Message ID 20210324013055.5619-2-pablo@netfilter.org (mailing list archive)
State Accepted
Commit ddb94eafab8b597b05904c8277194ea2d6357fa9
Delegated to: Netdev Maintainers
Headers show
Series netfilter: flowtable enhancements | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 8 maintainers not CCed: daniel@iogearbox.net cong.wang@bytedance.com andriin@fb.com ast@kernel.org ap420073@gmail.com edumazet@google.com atenart@kernel.org weiwan@google.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 6713 this patch: 6713
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch fail CHECK: Alignment should match open parenthesis ERROR: code indent should use tabs where possible WARNING: line length of 87 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: please, no spaces at the start of a line
netdev/build_allmodconfig_warn success Errors and warnings before: 6927 this patch: 6927
netdev/header_inline success Link

Commit Message

Pablo Neira Ayuso March 24, 2021, 1:30 a.m. UTC
This patch adds dev_fill_forward_path() which resolves the path to reach
the real netdevice from the IP forwarding side. This function takes as
input the netdevice and the destination hardware address and it walks
down the devices calling .ndo_fill_forward_path() for each device until
the real device is found.

For instance, assuming the following topology:

               IP forwarding
              /             \
           br0              eth0
           / \
       eth1  eth2
        .
        .
        .
       ethX
 ab:cd:ef:ab:cd:ef

where eth1 and eth2 are bridge ports and eth0 provides WAN connectivity.
ethX is the interface in another box which is connected to the eth1
bridge port.

For packets going through IP forwarding to br0 whose destination MAC
address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
following path:

	br0 -> eth1

.ndo_fill_forward_path for br0 looks up at the FDB for the bridge port
from the destination MAC address to get the bridge port eth1.

This information allows to create a fast path that bypasses the classic
bridge and IP forwarding paths, so packets go directly from the bridge
port eth1 to eth0 (wan interface) and vice versa.

             fast path
      .------------------------.
     /                          \
    |           IP forwarding   |
    |          /             \  \/
    |       br0               eth0
    .       / \
     -> eth1  eth2
        .
        .
        .
       ethX
 ab:cd:ef:ab:cd:ef

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
v2: no changes.

 include/linux/netdevice.h | 27 +++++++++++++++++++++++
 net/core/dev.c            | 46 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

Comments

Qingfang Deng March 24, 2021, 7:27 a.m. UTC | #1
On Wed, Mar 24, 2021 at 02:30:32AM +0100, Pablo Neira Ayuso wrote:
> This patch adds dev_fill_forward_path() which resolves the path to reach
> the real netdevice from the IP forwarding side. This function takes as
> input the netdevice and the destination hardware address and it walks
> down the devices calling .ndo_fill_forward_path() for each device until
> the real device is found.
> 
> For instance, assuming the following topology:
> 
>                IP forwarding
>               /             \
>            br0              eth0
>            / \
>        eth1  eth2
>         .
>         .
>         .
>        ethX
>  ab:cd:ef:ab:cd:ef
> 
> where eth1 and eth2 are bridge ports and eth0 provides WAN connectivity.
> ethX is the interface in another box which is connected to the eth1
> bridge port.
> 
> For packets going through IP forwarding to br0 whose destination MAC
> address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
> following path:
> 
> 	br0 -> eth1
> 
> .ndo_fill_forward_path for br0 looks up at the FDB for the bridge port
> from the destination MAC address to get the bridge port eth1.
> 
> This information allows to create a fast path that bypasses the classic
> bridge and IP forwarding paths, so packets go directly from the bridge
> port eth1 to eth0 (wan interface) and vice versa.
> 
>              fast path
>       .------------------------.
>      /                          \
>     |           IP forwarding   |
>     |          /             \  \/
>     |       br0               eth0
>     .       / \
>      -> eth1  eth2
>         .
>         .
>         .
>        ethX
>  ab:cd:ef:ab:cd:ef

Have you tested if roaming breaks existing TCP/UDP connections?
For example, eth1 and eth2 are connected to 2 WiFi APs, and the
client ab:cd:ef:ab:cd:ef roams between these APs.

> 
> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Pablo Neira Ayuso March 24, 2021, 10:03 a.m. UTC | #2
On Wed, Mar 24, 2021 at 03:27:11PM +0800, DENG Qingfang wrote:
> On Wed, Mar 24, 2021 at 02:30:32AM +0100, Pablo Neira Ayuso wrote:
> > This patch adds dev_fill_forward_path() which resolves the path to reach
> > the real netdevice from the IP forwarding side. This function takes as
> > input the netdevice and the destination hardware address and it walks
> > down the devices calling .ndo_fill_forward_path() for each device until
> > the real device is found.
> > 
> > For instance, assuming the following topology:
> > 
> >                IP forwarding
> >               /             \
> >            br0              eth0
> >            / \
> >        eth1  eth2
> >         .
> >         .
> >         .
> >        ethX
> >  ab:cd:ef:ab:cd:ef
> > 
> > where eth1 and eth2 are bridge ports and eth0 provides WAN connectivity.
> > ethX is the interface in another box which is connected to the eth1
> > bridge port.
> > 
> > For packets going through IP forwarding to br0 whose destination MAC
> > address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
> > following path:
> > 
> > 	br0 -> eth1
> > 
> > .ndo_fill_forward_path for br0 looks up at the FDB for the bridge port
> > from the destination MAC address to get the bridge port eth1.
> > 
> > This information allows to create a fast path that bypasses the classic
> > bridge and IP forwarding paths, so packets go directly from the bridge
> > port eth1 to eth0 (wan interface) and vice versa.
> > 
> >              fast path
> >       .------------------------.
> >      /                          \
> >     |           IP forwarding   |
> >     |          /             \  \/
> >     |       br0               eth0
> >     .       / \
> >      -> eth1  eth2
> >         .
> >         .
> >         .
> >        ethX
> >  ab:cd:ef:ab:cd:ef
> 
> Have you tested if roaming breaks existing TCP/UDP connections?
> For example, eth1 and eth2 are connected to 2 WiFi APs, and the
> client ab:cd:ef:ab:cd:ef roams between these APs.

For this scenario specifically, it should be possible extend the
existing flowtable netlink API to allow hostapd to flush entries in
the flowtable for the client changing AP.
Qingfang Deng March 24, 2021, 4:07 p.m. UTC | #3
On Wed, Mar 24, 2021 at 11:03:54AM +0100, Pablo Neira Ayuso wrote:
> 
> For this scenario specifically, it should be possible extend the
> existing flowtable netlink API to allow hostapd to flush entries in
> the flowtable for the client changing AP.

The APs are external, are we going to install hostapd to them, and
let them inform the gateway? They may not even run Linux.
Roaming can happen in a wired LAN too, see Vladimir's commit message
90dc8fd36078 ("net: bridge: notify switchdev of disappearance of old FDB entry upon migration").

I think the fastpath should monitor roaming (called "FDB migration" in
that commit) events, and update/flush the flowtable accordingly.
Pablo Neira Ayuso March 24, 2021, 7:15 p.m. UTC | #4
Hi,

On Thu, Mar 25, 2021 at 12:07:02AM +0800, DENG Qingfang wrote:
> On Wed, Mar 24, 2021 at 11:03:54AM +0100, Pablo Neira Ayuso wrote:
> > 
> > For this scenario specifically, it should be possible extend the
> > existing flowtable netlink API to allow hostapd to flush entries in
> > the flowtable for the client changing AP.
> 
> The APs are external, are we going to install hostapd to them, and
> let them inform the gateway? They may not even run Linux.

This falls within the scenario that Jakub described already. These
limitations are described in patch 24/24.

> Roaming can happen in a wired LAN too, see Vladimir's commit message
> 90dc8fd36078 ("net: bridge: notify switchdev of disappearance of old FDB entry upon migration").
> I think the fastpath should monitor roaming (called "FDB migration" in
> that commit) events, and update/flush the flowtable accordingly.

Why does this need to happen in this batch? Is there anything that
makes you think dealing with roaming cannot be done incrementally on
top of this series?

This patchset is already useful for a good number of use-cases
regardless the roaming scenario you describe.

FDB hardware offload was added years ago and the roaming mitigation
that you refer to was added just a few months ago.
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7005ad80e8d1..f9ac960699a4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -848,6 +848,27 @@  typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct net_device *sb_dev);
 
+enum net_device_path_type {
+	DEV_PATH_ETHERNET = 0,
+};
+
+struct net_device_path {
+	enum net_device_path_type	type;
+	const struct net_device		*dev;
+};
+
+#define NET_DEVICE_PATH_STACK_MAX	5
+
+struct net_device_path_stack {
+	int			num_paths;
+	struct net_device_path	path[NET_DEVICE_PATH_STACK_MAX];
+};
+
+struct net_device_path_ctx {
+	const struct net_device *dev;
+	const u8		*daddr;
+};
+
 enum tc_setup_type {
 	TC_SETUP_QDISC_MQPRIO,
 	TC_SETUP_CLSU32,
@@ -1282,6 +1303,8 @@  struct netdev_net_notifier {
  * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
  *	If a device is paired with a peer device, return the peer instance.
  *	The caller must be under RCU read context.
+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
+ *     Get the forwarding path to reach the real device from the HW destination address
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1488,6 +1511,8 @@  struct net_device_ops {
 	int			(*ndo_tunnel_ctl)(struct net_device *dev,
 						  struct ip_tunnel_parm *p, int cmd);
 	struct net_device *	(*ndo_get_peer_dev)(struct net_device *dev);
+	int                     (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
+                                                         struct net_device_path *path);
 };
 
 /**
@@ -2870,6 +2895,8 @@  void dev_remove_offload(struct packet_offload *po);
 
 int dev_get_iflink(const struct net_device *dev);
 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+			  struct net_device_path_stack *stack);
 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
diff --git a/net/core/dev.c b/net/core/dev.c
index c9a496f5e687..8a03f71aecac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -848,6 +848,52 @@  int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
 
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
+{
+	int k = stack->num_paths++;
+
+	if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
+		return NULL;
+
+	return &stack->path[k];
+}
+
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+			  struct net_device_path_stack *stack)
+{
+	const struct net_device *last_dev;
+	struct net_device_path_ctx ctx = {
+		.dev	= dev,
+		.daddr	= daddr,
+	};
+	struct net_device_path *path;
+	int ret = 0;
+
+	stack->num_paths = 0;
+	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
+		last_dev = ctx.dev;
+		path = dev_fwd_path(stack);
+		if (!path)
+			return -1;
+
+		memset(path, 0, sizeof(struct net_device_path));
+		ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
+		if (ret < 0)
+			return -1;
+
+		if (WARN_ON_ONCE(last_dev == ctx.dev))
+			return -1;
+	}
+	path = dev_fwd_path(stack);
+	if (!path)
+		return -1;
+	path->type = DEV_PATH_ETHERNET;
+	path->dev = ctx.dev;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace