diff mbox series

[net-next,v2,3/4] net: dsa: add support for offloading HSR

Message ID 20210204215926.64377-4-george.mccollister@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series add HSR offloading support for DSA switches | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 2 maintainers not CCed: linux@armlinux.org.uk davem@davemloft.net
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 20 this patch: 20
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch fail ERROR: Macros with complex values should be enclosed in parentheses
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

George McCollister Feb. 4, 2021, 9:59 p.m. UTC
Add support for offloading of HSR/PRP (IEC 62439-3) tag insertion
tag removal, duplicate generation and forwarding on DSA switches.

Add DSA_NOTIFIER_HSR_JOIN and DSA_NOTIFIER_HSR_LEAVE which trigger calls
to .port_hsr_join and .port_hsr_leave in the DSA driver for the switch.

The DSA switch driver should then set netdev feature flags for the
HSR/PRP operation that it offloads.
    NETIF_F_HW_HSR_TAG_INS
    NETIF_F_HW_HSR_TAG_RM
    NETIF_F_HW_HSR_FWD
    NETIF_F_HW_HSR_DUP

Signed-off-by: George McCollister <george.mccollister@gmail.com>
---
 include/net/dsa.h  | 13 +++++++++++++
 net/dsa/dsa_priv.h | 11 +++++++++++
 net/dsa/port.c     | 34 ++++++++++++++++++++++++++++++++++
 net/dsa/slave.c    | 14 ++++++++++++++
 net/dsa/switch.c   | 24 ++++++++++++++++++++++++
 5 files changed, 96 insertions(+)

Comments

Vladimir Oltean Feb. 6, 2021, 11:29 p.m. UTC | #1
On Thu, Feb 04, 2021 at 03:59:25PM -0600, George McCollister wrote:
> @@ -1935,6 +1936,19 @@ static int dsa_slave_changeupper(struct net_device *dev,
>  			dsa_port_lag_leave(dp, info->upper_dev);
>  			err = NOTIFY_OK;
>  		}
> +	} else if (is_hsr_master(info->upper_dev)) {
> +		if (info->linking) {
> +			err = dsa_port_hsr_join(dp, info->upper_dev);
> +			if (err == -EOPNOTSUPP) {
> +				NL_SET_ERR_MSG_MOD(info->info.extack,
> +						   "Offloading not supported");
> +				err = 0;
> +			}
> +			err = notifier_from_errno(err);
> +		} else {
> +			dsa_port_hsr_leave(dp, info->upper_dev);
> +			err = NOTIFY_OK;
> +		}
>  	}
[..]
> +static int dsa_switch_hsr_join(struct dsa_switch *ds,
> +			       struct dsa_notifier_hsr_info *info)
> +{
> +	if (ds->index == info->sw_index && ds->ops->port_hsr_join)
> +		return ds->ops->port_hsr_join(ds, info->port, info->hsr);
> +
> +	return 0;
> +}
> +
> +static int dsa_switch_hsr_leave(struct dsa_switch *ds,
> +				struct dsa_notifier_hsr_info *info)
> +{
> +	if (ds->index == info->sw_index && ds->ops->port_hsr_leave)
> +		ds->ops->port_hsr_leave(ds, info->port, info->hsr);
> +
> +	return 0;
> +}
> +

If you return zero, the software fallback is never going to kick in.
George McCollister Feb. 8, 2021, 5:21 p.m. UTC | #2
On Sat, Feb 6, 2021 at 5:29 PM Vladimir Oltean <olteanv@gmail.com> wrote:
>
> On Thu, Feb 04, 2021 at 03:59:25PM -0600, George McCollister wrote:
> > @@ -1935,6 +1936,19 @@ static int dsa_slave_changeupper(struct net_device *dev,
> >                       dsa_port_lag_leave(dp, info->upper_dev);
> >                       err = NOTIFY_OK;
> >               }
> > +     } else if (is_hsr_master(info->upper_dev)) {
> > +             if (info->linking) {
> > +                     err = dsa_port_hsr_join(dp, info->upper_dev);
> > +                     if (err == -EOPNOTSUPP) {
> > +                             NL_SET_ERR_MSG_MOD(info->info.extack,
> > +                                                "Offloading not supported");
> > +                             err = 0;
> > +                     }
> > +                     err = notifier_from_errno(err);
> > +             } else {
> > +                     dsa_port_hsr_leave(dp, info->upper_dev);
> > +                     err = NOTIFY_OK;
> > +             }
> >       }
> [..]
> > +static int dsa_switch_hsr_join(struct dsa_switch *ds,
> > +                            struct dsa_notifier_hsr_info *info)
> > +{
> > +     if (ds->index == info->sw_index && ds->ops->port_hsr_join)
> > +             return ds->ops->port_hsr_join(ds, info->port, info->hsr);
> > +
> > +     return 0;
> > +}
> > +
> > +static int dsa_switch_hsr_leave(struct dsa_switch *ds,
> > +                             struct dsa_notifier_hsr_info *info)
> > +{
> > +     if (ds->index == info->sw_index && ds->ops->port_hsr_leave)
> > +             ds->ops->port_hsr_leave(ds, info->port, info->hsr);
> > +
> > +     return 0;
> > +}
> > +
>
> If you return zero, the software fallback is never going to kick in.

For join and leave? How is this not a problem for the bridge and lag
functions? They work the same way don't they? I figured it would be
safe to follow what they were doing.
Vladimir Oltean Feb. 9, 2021, 5:20 p.m. UTC | #3
On Mon, Feb 08, 2021 at 11:21:26AM -0600, George McCollister wrote:
> > If you return zero, the software fallback is never going to kick in.
>
> For join and leave? How is this not a problem for the bridge and lag
> functions? They work the same way don't they? I figured it would be
> safe to follow what they were doing.

I didn't say that the bridge and LAG offloading logic does the right
thing, but it is on its way there...

Those "XXX not offloaded" messages were tested with cases where the
.port_lag_join callback _is_ present, but fails (due to things like
incompatible xmit hashing policy). They were not tested with the case
where the driver does not implement .port_lag_join at all.

Doesn't mean you shouldn't do the right thing. I'll send some patches
soon, hopefully, fixing that for LAG and the bridge, you can concentrate
on HSR. For the non-offload scenario where the port is basically
standalone, we also need to disable the other bridge functions such as
address learning, otherwise it won't work properly, and that's where
I've been focusing my attention lately. You can't offload the bridge in
software, or a LAG, if you have address learning enabled. For HSR it's
even more interesting, you need to have address learning disabled even
when you offload the DANH/DANP.
George McCollister Feb. 9, 2021, 6:37 p.m. UTC | #4
On Tue, Feb 9, 2021 at 11:20 AM Vladimir Oltean <olteanv@gmail.com> wrote:
>
> On Mon, Feb 08, 2021 at 11:21:26AM -0600, George McCollister wrote:
> > > If you return zero, the software fallback is never going to kick in.
> >
> > For join and leave? How is this not a problem for the bridge and lag
> > functions? They work the same way don't they? I figured it would be
> > safe to follow what they were doing.
>
> I didn't say that the bridge and LAG offloading logic does the right
> thing, but it is on its way there...
>
> Those "XXX not offloaded" messages were tested with cases where the
> .port_lag_join callback _is_ present, but fails (due to things like
> incompatible xmit hashing policy). They were not tested with the case
> where the driver does not implement .port_lag_join at all.
>
> Doesn't mean you shouldn't do the right thing. I'll send some patches
> soon, hopefully, fixing that for LAG and the bridge, you can concentrate
> on HSR. For the non-offload scenario where the port is basically
> standalone, we also need to disable the other bridge functions such as
> address learning, otherwise it won't work properly, and that's where
> I've been focusing my attention lately. You can't offload the bridge in
> software, or a LAG, if you have address learning enabled. For HSR it's
> even more interesting, you need to have address learning disabled even
> when you offload the DANH/DANP.

Do I just return -EOPNOTSUPP instead of 0 in dsa_switch_hsr_join and
dsa_switch_hsr_leave?

I'm not sure exactly what you're saying needs to be done wrt to
address learning with HSR. The switch does address learning
internally. Are you saying the DSA address learning needs to be
disabled? If that's something I need for this patch some tips on what
to do would be appreciated because I'm a bit lost.

Thanks
Vladimir Oltean Feb. 9, 2021, 6:51 p.m. UTC | #5
On Tue, Feb 09, 2021 at 12:37:38PM -0600, George McCollister wrote:
> On Tue, Feb 9, 2021 at 11:20 AM Vladimir Oltean <olteanv@gmail.com> wrote:
> >
> > On Mon, Feb 08, 2021 at 11:21:26AM -0600, George McCollister wrote:
> > > > If you return zero, the software fallback is never going to kick in.
> > >
> > > For join and leave? How is this not a problem for the bridge and lag
> > > functions? They work the same way don't they? I figured it would be
> > > safe to follow what they were doing.
> >
> > I didn't say that the bridge and LAG offloading logic does the right
> > thing, but it is on its way there...
> >
> > Those "XXX not offloaded" messages were tested with cases where the
> > .port_lag_join callback _is_ present, but fails (due to things like
> > incompatible xmit hashing policy). They were not tested with the case
> > where the driver does not implement .port_lag_join at all.
> >
> > Doesn't mean you shouldn't do the right thing. I'll send some patches
> > soon, hopefully, fixing that for LAG and the bridge, you can concentrate
> > on HSR. For the non-offload scenario where the port is basically
> > standalone, we also need to disable the other bridge functions such as
> > address learning, otherwise it won't work properly, and that's where
> > I've been focusing my attention lately. You can't offload the bridge in
> > software, or a LAG, if you have address learning enabled. For HSR it's
> > even more interesting, you need to have address learning disabled even
> > when you offload the DANH/DANP.
> 
> Do I just return -EOPNOTSUPP instead of 0 in dsa_switch_hsr_join and
> dsa_switch_hsr_leave?

Yes, return -EOPNOTSUPP if the callbacks are not implemented please.

> I'm not sure exactly what you're saying needs to be done wrt to
> address learning with HSR. The switch does address learning
> internally. Are you saying the DSA address learning needs to be
> disabled?

I'm saying that when you're doing any sort of redundancy protocol, the
switch will get confused by address learning if it performs it at
physical port level, because it will see the same source MAC address
coming in from 2 (or more) different physical ports. And when it sees a
packet coming in through a port that it had already learned it should be
the destination for the MAC address because an earlier packet came
having that MAC address as source, it will think it should do
hairpinning which it's configured not to => it'll drop the packet.

Now, your switch might have some sort of concept of address learning at
logical port level, where the "logical port" would roughly correspond to
the hsr0 upper (I haven't opened the XRS700x manual to know if it does
this, sorry). Basically if you support RedBox I expect that the switch
is able to learn at the level of "this MAC address came from the HSR
ring, aka from one or both of the individual ring ports". But for
configuring that in Linux, you'd need something like:

ip link set hsr0 master br0
ip link set hsr0 type bridge_slave learning on

and then catch from DSA the switchdev notification emitted for hsr0, and
use that to configure learning on the logical port of your switch
corresponding to hsr0, instead of learning on the physical ports that
offload it.

There are similar issues related to address learning for everything
except a bridge port, basically.

> If that's something I need for this patch some tips on what
> to do would be appreciated because I'm a bit lost.

I didn't say you need to change something related to learning for this
series, because you can't anyway - DSA doesn't give you the knobs to
configure address learning yet. The series where I try to add those are
here:
https://patchwork.kernel.org/project/netdevbpf/cover/20210209151936.97382-1-olteanv@gmail.com/

The take-away is that with those changes, a DSA driver should start its
ports in standalone mode with learning disabled and flooding of all
kinds enabled, and then treat the .port_bridge_flags callback which
should do the right thing and enable/disable address learning only when
necessary.

All I said is that address learning remaining enabled has been an issue
that prevented the non-offload scenarios from really working, but you
shouldn't be too hung up on that, and still return -EOPNOTSUPP, thereby
allowing the software fallback to kick in, even if it doesn't work.
George McCollister Feb. 9, 2021, 7:09 p.m. UTC | #6
On Tue, Feb 9, 2021 at 12:51 PM Vladimir Oltean <olteanv@gmail.com> wrote:
>
> On Tue, Feb 09, 2021 at 12:37:38PM -0600, George McCollister wrote:
> > On Tue, Feb 9, 2021 at 11:20 AM Vladimir Oltean <olteanv@gmail.com> wrote:
> > >
> > > On Mon, Feb 08, 2021 at 11:21:26AM -0600, George McCollister wrote:
> > > > > If you return zero, the software fallback is never going to kick in.
> > > >
> > > > For join and leave? How is this not a problem for the bridge and lag
> > > > functions? They work the same way don't they? I figured it would be
> > > > safe to follow what they were doing.
> > >
> > > I didn't say that the bridge and LAG offloading logic does the right
> > > thing, but it is on its way there...
> > >
> > > Those "XXX not offloaded" messages were tested with cases where the
> > > .port_lag_join callback _is_ present, but fails (due to things like
> > > incompatible xmit hashing policy). They were not tested with the case
> > > where the driver does not implement .port_lag_join at all.
> > >
> > > Doesn't mean you shouldn't do the right thing. I'll send some patches
> > > soon, hopefully, fixing that for LAG and the bridge, you can concentrate
> > > on HSR. For the non-offload scenario where the port is basically
> > > standalone, we also need to disable the other bridge functions such as
> > > address learning, otherwise it won't work properly, and that's where
> > > I've been focusing my attention lately. You can't offload the bridge in
> > > software, or a LAG, if you have address learning enabled. For HSR it's
> > > even more interesting, you need to have address learning disabled even
> > > when you offload the DANH/DANP.
> >
> > Do I just return -EOPNOTSUPP instead of 0 in dsa_switch_hsr_join and
> > dsa_switch_hsr_leave?
>
> Yes, return -EOPNOTSUPP if the callbacks are not implemented please.

Will do.

>
> > I'm not sure exactly what you're saying needs to be done wrt to
> > address learning with HSR. The switch does address learning
> > internally. Are you saying the DSA address learning needs to be
> > disabled?
>
> I'm saying that when you're doing any sort of redundancy protocol, the
> switch will get confused by address learning if it performs it at
> physical port level, because it will see the same source MAC address
> coming in from 2 (or more) different physical ports. And when it sees a
> packet coming in through a port that it had already learned it should be
> the destination for the MAC address because an earlier packet came
> having that MAC address as source, it will think it should do
> hairpinning which it's configured not to => it'll drop the packet.
>
> Now, your switch might have some sort of concept of address learning at
> logical port level, where the "logical port" would roughly correspond to
> the hsr0 upper (I haven't opened the XRS700x manual to know if it does
> this, sorry). Basically if you support RedBox I expect that the switch
> is able to learn at the level of "this MAC address came from the HSR
> ring, aka from one or both of the individual ring ports". But for
> configuring that in Linux, you'd need something like:

Yup, the switch has provisions to deal with this.

>
> ip link set hsr0 master br0
> ip link set hsr0 type bridge_slave learning on
>
> and then catch from DSA the switchdev notification emitted for hsr0, and
> use that to configure learning on the logical port of your switch
> corresponding to hsr0, instead of learning on the physical ports that
> offload it.
>
> There are similar issues related to address learning for everything
> except a bridge port, basically.
>
> > If that's something I need for this patch some tips on what
> > to do would be appreciated because I'm a bit lost.
>
> I didn't say you need to change something related to learning for this
> series, because you can't anyway - DSA doesn't give you the knobs to
> configure address learning yet. The series where I try to add those are
> here:
> https://patchwork.kernel.org/project/netdevbpf/cover/20210209151936.97382-1-olteanv@gmail.com/
>
> The take-away is that with those changes, a DSA driver should start its
> ports in standalone mode with learning disabled and flooding of all
> kinds enabled, and then treat the .port_bridge_flags callback which
> should do the right thing and enable/disable address learning only when
> necessary.
>
> All I said is that address learning remaining enabled has been an issue
> that prevented the non-offload scenarios from really working, but you
> shouldn't be too hung up on that, and still return -EOPNOTSUPP, thereby
> allowing the software fallback to kick in, even if it doesn't work.

Thanks for clearing that up and explaining how this will work in the future.
diff mbox series

Patch

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 60acb9fca124..84875960b706 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -172,6 +172,10 @@  struct dsa_switch_tree {
 	list_for_each_entry((_dp), &(_dst)->ports, list)	\
 		if ((_dp)->lag_dev == (_lag))
 
+#define dsa_hsr_foreach_port(_dp, _ds, _hsr)			\
+	list_for_each_entry((_dp), &(_ds)->dst->ports, list)	\
+		if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr))
+
 static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst,
 					     unsigned int id)
 {
@@ -264,6 +268,7 @@  struct dsa_port {
 	struct phylink_config	pl_config;
 	struct net_device	*lag_dev;
 	bool			lag_tx_enabled;
+	struct net_device	*hsr_dev;
 
 	struct list_head list;
 
@@ -769,6 +774,14 @@  struct dsa_switch_ops {
 				 struct netdev_lag_upper_info *info);
 	int	(*port_lag_leave)(struct dsa_switch *ds, int port,
 				  struct net_device *lag);
+
+	/*
+	 * HSR integration
+	 */
+	int	(*port_hsr_join)(struct dsa_switch *ds, int port,
+				 struct net_device *hsr);
+	void	(*port_hsr_leave)(struct dsa_switch *ds, int port,
+				  struct net_device *hsr);
 };
 
 #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes)		\
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 263593ce94a8..bb41f8bf4f6e 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -20,6 +20,8 @@  enum {
 	DSA_NOTIFIER_BRIDGE_LEAVE,
 	DSA_NOTIFIER_FDB_ADD,
 	DSA_NOTIFIER_FDB_DEL,
+	DSA_NOTIFIER_HSR_JOIN,
+	DSA_NOTIFIER_HSR_LEAVE,
 	DSA_NOTIFIER_LAG_CHANGE,
 	DSA_NOTIFIER_LAG_JOIN,
 	DSA_NOTIFIER_LAG_LEAVE,
@@ -100,6 +102,13 @@  struct dsa_switchdev_event_work {
 	u16 vid;
 };
 
+/* DSA_NOTIFIER_HSR_* */
+struct dsa_notifier_hsr_info {
+	struct net_device *hsr;
+	int sw_index;
+	int port;
+};
+
 struct dsa_slave_priv {
 	/* Copy of CPU port xmit for faster access in slave transmit hot path */
 	struct sk_buff *	(*xmit)(struct sk_buff *skb,
@@ -183,6 +192,8 @@  int dsa_port_vlan_del(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan);
 int dsa_port_link_register_of(struct dsa_port *dp);
 void dsa_port_link_unregister_of(struct dsa_port *dp);
+int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
+void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
 extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
 
 static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 5e079a61528e..b93bda463026 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -868,3 +868,37 @@  int dsa_port_get_phy_sset_count(struct dsa_port *dp)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count);
+
+int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
+{
+	struct dsa_notifier_hsr_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.hsr = hsr,
+	};
+	int err;
+
+	dp->hsr_dev = hsr;
+
+	err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_JOIN, &info);
+	if (err)
+		dp->hsr_dev = NULL;
+
+	return err;
+}
+
+void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
+{
+	struct dsa_notifier_hsr_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.hsr = hsr,
+	};
+	int err;
+
+	dp->hsr_dev = NULL;
+
+	err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info);
+	if (err)
+		pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n");
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b0571ab4e5a7..11d01276f11d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -17,6 +17,7 @@ 
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
 #include <linux/if_bridge.h>
+#include <linux/if_hsr.h>
 #include <linux/netpoll.h>
 #include <linux/ptp_classify.h>
 
@@ -1935,6 +1936,19 @@  static int dsa_slave_changeupper(struct net_device *dev,
 			dsa_port_lag_leave(dp, info->upper_dev);
 			err = NOTIFY_OK;
 		}
+	} else if (is_hsr_master(info->upper_dev)) {
+		if (info->linking) {
+			err = dsa_port_hsr_join(dp, info->upper_dev);
+			if (err == -EOPNOTSUPP) {
+				NL_SET_ERR_MSG_MOD(info->info.extack,
+						   "Offloading not supported");
+				err = 0;
+			}
+			err = notifier_from_errno(err);
+		} else {
+			dsa_port_hsr_leave(dp, info->upper_dev);
+			err = NOTIFY_OK;
+		}
 	}
 
 	return err;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 5026e4143663..1e0a65b307de 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -166,6 +166,24 @@  static int dsa_switch_fdb_del(struct dsa_switch *ds,
 	return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
 }
 
+static int dsa_switch_hsr_join(struct dsa_switch *ds,
+			       struct dsa_notifier_hsr_info *info)
+{
+	if (ds->index == info->sw_index && ds->ops->port_hsr_join)
+		return ds->ops->port_hsr_join(ds, info->port, info->hsr);
+
+	return 0;
+}
+
+static int dsa_switch_hsr_leave(struct dsa_switch *ds,
+				struct dsa_notifier_hsr_info *info)
+{
+	if (ds->index == info->sw_index && ds->ops->port_hsr_leave)
+		ds->ops->port_hsr_leave(ds, info->port, info->hsr);
+
+	return 0;
+}
+
 static int dsa_switch_lag_change(struct dsa_switch *ds,
 				 struct dsa_notifier_lag_info *info)
 {
@@ -371,6 +389,12 @@  static int dsa_switch_event(struct notifier_block *nb,
 	case DSA_NOTIFIER_FDB_DEL:
 		err = dsa_switch_fdb_del(ds, info);
 		break;
+	case DSA_NOTIFIER_HSR_JOIN:
+		err = dsa_switch_hsr_join(ds, info);
+		break;
+	case DSA_NOTIFIER_HSR_LEAVE:
+		err = dsa_switch_hsr_leave(ds, info);
+		break;
 	case DSA_NOTIFIER_LAG_CHANGE:
 		err = dsa_switch_lag_change(ds, info);
 		break;