diff mbox series

[net-next,5/6] sfc: neighbour lookup for TC encap action offload

Message ID 286b3685eabf6cdd98021215b9b00020b442a42b.1685992503.git.ecree.xilinx@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series sfc: TC encap actions offload | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api warning Found: 'dev_hold(' was: 0 now: 2; 'dev_put(' was: 0 now: 3
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 9 this patch: 9
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 57 this patch: 57
netdev/source_inline success Was 0 now: 0

Commit Message

edward.cree@amd.com June 5, 2023, 7:17 p.m. UTC
From: Edward Cree <ecree.xilinx@gmail.com>

For each neighbour we're interested in, create a struct efx_neigh_binder
 object which has a list of all the encap_actions using it.  When we
 receive a neighbouring update (through the netevent notifier), find the
 corresponding efx_neigh_binder and update all its users.
Since the actual generation of encap headers is still only a stub, the
 resulting rules still get left on fallback actions.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
---
 drivers/net/ethernet/sfc/ef100_netdev.c     |  34 ++
 drivers/net/ethernet/sfc/net_driver.h       |   3 +
 drivers/net/ethernet/sfc/tc.c               |  12 +-
 drivers/net/ethernet/sfc/tc.h               |   7 +
 drivers/net/ethernet/sfc/tc_bindings.c      |  13 +
 drivers/net/ethernet/sfc/tc_bindings.h      |   2 +
 drivers/net/ethernet/sfc/tc_encap_actions.c | 444 +++++++++++++++++++-
 drivers/net/ethernet/sfc/tc_encap_actions.h |  52 +++
 8 files changed, 561 insertions(+), 6 deletions(-)

Comments

Pieter Jansen van Vuuren June 6, 2023, 10:54 a.m. UTC | #1
On 05/06/2023 20:17, edward.cree@amd.com wrote:
> From: Edward Cree <ecree.xilinx@gmail.com>
> 
> For each neighbour we're interested in, create a struct efx_neigh_binder
>  object which has a list of all the encap_actions using it.  When we
>  receive a neighbouring update (through the netevent notifier), find the
>  corresponding efx_neigh_binder and update all its users.
> Since the actual generation of encap headers is still only a stub, the
>  resulting rules still get left on fallback actions.
> 
> Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>

Reviewed-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Jakub Kicinski June 7, 2023, 4:56 a.m. UTC | #2
On Mon, 5 Jun 2023 20:17:38 +0100 edward.cree@amd.com wrote:
> +			dev_hold(neigh->egdev = dst->dev);

Please use the ref-tracker enabled helpers in new code.
And the assignment on a separate line, please.
Simon Horman June 7, 2023, 8:34 a.m. UTC | #3
On Mon, Jun 05, 2023 at 08:17:38PM +0100, edward.cree@amd.com wrote:

...

> +static int efx_bind_neigh(struct efx_nic *efx,
> +			  struct efx_tc_encap_action *encap, struct net *net,
> +			  struct netlink_ext_ack *extack)
> +{
> +	struct efx_neigh_binder *neigh, *old;
> +	struct flowi6 flow6 = {};
> +	struct flowi4 flow4 = {};
> +	int rc;
> +
> +	/* GCC stupidly thinks that only values explicitly listed in the enum
> +	 * definition can _possibly_ be sensible case values, so without this
> +	 * cast it complains about the IPv6 versions.
> +	 */
> +	switch ((int)encap->type) {
> +	case EFX_ENCAP_TYPE_VXLAN:
> +	case EFX_ENCAP_TYPE_GENEVE:
> +		flow4.flowi4_proto = IPPROTO_UDP;
> +		flow4.fl4_dport = encap->key.tp_dst;
> +		flow4.flowi4_tos = encap->key.tos;
> +		flow4.daddr = encap->key.u.ipv4.dst;
> +		flow4.saddr = encap->key.u.ipv4.src;
> +		break;
> +	case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
> +	case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
> +		flow6.flowi6_proto = IPPROTO_UDP;
> +		flow6.fl6_dport = encap->key.tp_dst;
> +		flow6.flowlabel = ip6_make_flowinfo(encap->key.tos,
> +						    encap->key.label);
> +		flow6.daddr = encap->key.u.ipv6.dst;
> +		flow6.saddr = encap->key.u.ipv6.src;
> +		break;
> +	default:
> +		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
> +				       (int)encap->type);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT);
> +	if (!neigh)
> +		return -ENOMEM;
> +	neigh->net = get_net(net);
> +	neigh->dst_ip = flow4.daddr;
> +	neigh->dst_ip6 = flow6.daddr;
> +
> +	old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht,
> +						&neigh->linkage,
> +						efx_neigh_ht_params);
> +	if (old) {
> +		/* don't need our new entry */
> +		put_net(neigh->net);
> +		kfree(neigh);
> +		if (!refcount_inc_not_zero(&old->ref))
> +			return -EAGAIN;
> +		/* existing entry found, ref taken */
> +		neigh = old;
> +	} else {
> +		/* New entry.  We need to initiate a lookup */
> +		struct neighbour *n;
> +		struct rtable *rt;
> +
> +		if (encap->type & EFX_ENCAP_FLAG_IPV6) {
> +#if IS_ENABLED(CONFIG_IPV6)
> +			struct dst_entry *dst;
> +
> +			dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
> +							      NULL);
> +			rc = PTR_ERR_OR_ZERO(dst);
> +			if (rc) {
> +				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
> +				goto out_free;
> +			}
> +			dev_hold(neigh->egdev = dst->dev);
> +			neigh->ttl = ip6_dst_hoplimit(dst);
> +			n = dst_neigh_lookup(dst, &flow6.daddr);
> +			dst_release(dst);
> +#else
> +			/* We shouldn't ever get here, because if IPv6 isn't
> +			 * enabled how did someone create an IPv6 tunnel_key?
> +			 */
> +			rc = -EOPNOTSUPP;
> +			NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
> +#endif
> +		} else {
> +			rt = ip_route_output_key(net, &flow4);
> +			if (IS_ERR_OR_NULL(rt)) {
> +				rc = PTR_ERR(rt);

Hi Edward,

A minor nit from my side: perhaps this should use PTR_ERR_OR_ZERO().

> +				if (!rc)
> +					rc = -EIO;
> +				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
> +				goto out_free;
> +			}
> +			dev_hold(neigh->egdev = rt->dst.dev);
> +			neigh->ttl = ip4_dst_hoplimit(&rt->dst);
> +			n = dst_neigh_lookup(&rt->dst, &flow4.daddr);
> +			ip_rt_put(rt);
> +		}
> +		if (!n) {
> +			rc = -ENETUNREACH;
> +			NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
> +			dev_put(neigh->egdev);
> +			goto out_free;
> +		}
> +		refcount_set(&neigh->ref, 1);
> +		INIT_LIST_HEAD(&neigh->users);
> +		read_lock_bh(&n->lock);
> +		ether_addr_copy(neigh->ha, n->ha);
> +		neigh->n_valid = n->nud_state & NUD_VALID;
> +		read_unlock_bh(&n->lock);
> +		rwlock_init(&neigh->lock);
> +		INIT_WORK(&neigh->work, efx_neigh_update);
> +		neigh->efx = efx;
> +		neigh->used = jiffies;
> +		if (!neigh->n_valid)
> +			/* Prod ARP to find us a neighbour */
> +			neigh_event_send(n, NULL);
> +		neigh_release(n);
> +	}
> +	/* Add us to this neigh */
> +	encap->neigh = neigh;
> +	list_add_tail(&encap->list, &neigh->users);
> +	return 0;
> +
> +out_free:
> +	/* cleanup common to several error paths */
> +	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
> +			       efx_neigh_ht_params);
> +	synchronize_rcu();
> +	put_net(net);
> +	kfree(neigh);
> +	return rc;
> +}

...
Edward Cree June 7, 2023, 8:51 p.m. UTC | #4
On 07/06/2023 05:56, Jakub Kicinski wrote:
> On Mon, 5 Jun 2023 20:17:38 +0100 edward.cree@amd.com wrote:
>> +			dev_hold(neigh->egdev = dst->dev);
> 
> Please use the ref-tracker enabled helpers in new code.

Fair point.  Guessing that applies to the netns reference as well?
(Though looking back I honestly can't remember why we need to hold
 the neigh->net reference for the life of efx_neigh_binder; but I
 presumably had some reason for it at the time and I'm leery of
 removing it now in case it's load-bearing.
 Chesterton's Fence and all that.)

> And the assignment on a separate line, please.

Will do.

-ed
Jakub Kicinski June 7, 2023, 11 p.m. UTC | #5
On Wed, 7 Jun 2023 21:51:35 +0100 Edward Cree wrote:
> Fair point.  Guessing that applies to the netns reference as well?

Yes. I should add that to the checker..

> (Though looking back I honestly can't remember why we need to hold
>  the neigh->net reference for the life of efx_neigh_binder; but I
>  presumably had some reason for it at the time and I'm leery of
>  removing it now in case it's load-bearing.
>  Chesterton's Fence and all that.)

You don't seem to be doing much with the reference, just holding it.
Your call.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c
index 274f3a2562ad..7f7d560cb2b4 100644
--- a/drivers/net/ethernet/sfc/ef100_netdev.c
+++ b/drivers/net/ethernet/sfc/ef100_netdev.c
@@ -24,6 +24,7 @@ 
 #include "rx_common.h"
 #include "ef100_sriov.h"
 #include "tc_bindings.h"
+#include "tc_encap_actions.h"
 #include "efx_devlink.h"
 
 static void ef100_update_name(struct efx_nic *efx)
@@ -300,14 +301,38 @@  int ef100_netdev_event(struct notifier_block *this,
 {
 	struct efx_nic *efx = container_of(this, struct efx_nic, netdev_notifier);
 	struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
+	struct ef100_nic_data *nic_data = efx->nic_data;
+	int err;
 
 	if (efx->net_dev == net_dev &&
 	    (event == NETDEV_CHANGENAME || event == NETDEV_REGISTER))
 		ef100_update_name(efx);
 
+	if (!nic_data->grp_mae)
+		return NOTIFY_DONE;
+	err = efx_tc_netdev_event(efx, event, net_dev);
+	if (err & NOTIFY_STOP_MASK)
+		return err;
+
 	return NOTIFY_DONE;
 }
 
+static int ef100_netevent_event(struct notifier_block *this,
+				unsigned long event, void *ptr)
+{
+	struct efx_nic *efx = container_of(this, struct efx_nic, netevent_notifier);
+	struct ef100_nic_data *nic_data = efx->nic_data;
+	int err;
+
+	if (!nic_data->grp_mae)
+		return NOTIFY_DONE;
+	err = efx_tc_netevent_event(efx, event, ptr);
+	if (err & NOTIFY_STOP_MASK)
+		return err;
+
+	return NOTIFY_DONE;
+};
+
 static int ef100_register_netdev(struct efx_nic *efx)
 {
 	struct net_device *net_dev = efx->net_dev;
@@ -367,6 +392,7 @@  void ef100_remove_netdev(struct efx_probe_data *probe_data)
 	rtnl_unlock();
 
 	unregister_netdevice_notifier(&efx->netdev_notifier);
+	unregister_netevent_notifier(&efx->netevent_notifier);
 #if defined(CONFIG_SFC_SRIOV)
 	if (!efx->type->is_vf)
 		efx_ef100_pci_sriov_disable(efx, true);
@@ -487,6 +513,14 @@  int ef100_probe_netdev(struct efx_probe_data *probe_data)
 		goto fail;
 	}
 
+	efx->netevent_notifier.notifier_call = ef100_netevent_event;
+	rc = register_netevent_notifier(&efx->netevent_notifier);
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "Failed to register netevent notifier, rc=%d\n", rc);
+		goto fail;
+	}
+
 	efx_probe_devlink_unlock(efx);
 	return rc;
 fail:
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index fcd51d3992fa..a7a22b019794 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -27,6 +27,7 @@ 
 #include <linux/mtd/mtd.h>
 #include <net/busy_poll.h>
 #include <net/xdp.h>
+#include <net/netevent.h>
 
 #include "enum.h"
 #include "bitfield.h"
@@ -996,6 +997,7 @@  struct efx_mae;
  * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
  *      xdp_rxq_info structures?
  * @netdev_notifier: Netdevice notifier.
+ * @netevent_notifier: Netevent notifier (for neighbour updates).
  * @tc: state for TC offload (EF100).
  * @devlink: reference to devlink structure owned by this device
  * @dl_port: devlink port associated with the PF
@@ -1183,6 +1185,7 @@  struct efx_nic {
 	bool xdp_rxq_info_failed;
 
 	struct notifier_block netdev_notifier;
+	struct notifier_block netevent_notifier;
 	struct efx_tc_state *tc;
 
 	struct devlink *devlink;
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 4177feced3e6..77acdb60381e 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -34,8 +34,8 @@  enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
  * May return NULL for the PF (us), or an error pointer for a device that
  * isn't supported as a TC offload endpoint
  */
-static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
-						struct net_device *dev)
+struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
+					 struct net_device *dev)
 {
 	struct efx_rep *efv;
 
@@ -71,7 +71,7 @@  static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv
 }
 
 /* Convert a driver-internal vport ID into an external device (wire or VF) */
-static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
+s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
 {
 	u32 mport;
 
@@ -112,8 +112,10 @@  static void efx_tc_free_action_set(struct efx_nic *efx,
 	}
 	if (act->count)
 		efx_tc_flower_put_counter_index(efx, act->count);
-	if (act->encap_md)
+	if (act->encap_md) {
+		list_del(&act->encap_user);
 		efx_tc_flower_release_encap_md(efx, act->encap_md);
+	}
 	kfree(act);
 }
 
@@ -1115,6 +1117,7 @@  static int efx_tc_flower_replace(struct efx_nic *efx,
 					goto release;
 				}
 				act->encap_md = encap;
+				list_add_tail(&act->encap_user, &encap->users);
 				act->dest_mport = encap->dest_mport;
 				act->deliver = 1;
 				rc = efx_mae_alloc_action_set(efx, act);
@@ -1123,6 +1126,7 @@  static int efx_tc_flower_replace(struct efx_nic *efx,
 					goto release;
 				}
 				list_add_tail(&act->list, &rule->acts.list);
+				act->user = &rule->acts;
 				act = NULL;
 				if (fa->id == FLOW_ACTION_REDIRECT)
 					break; /* end of the line */
diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h
index 5a8f701b05c5..607429f8bb28 100644
--- a/drivers/net/ethernet/sfc/tc.h
+++ b/drivers/net/ethernet/sfc/tc.h
@@ -36,6 +36,8 @@  struct efx_tc_action_set {
 	__be16 vlan_proto[2]; /* Ethertypes for vlan_push */
 	struct efx_tc_counter_index *count;
 	struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */
+	struct list_head encap_user; /* entry on encap_md->users list */
+	struct efx_tc_action_set_list *user; /* Only populated if encap_md */
 	u32 dest_mport;
 	u32 fw_id; /* index of this entry in firmware actions table */
 	struct list_head list;
@@ -151,6 +153,7 @@  enum efx_tc_rule_prios {
  * @encap_ht: Hashtable of TC encap actions
  * @encap_match_ht: Hashtable of TC encap matches
  * @match_action_ht: Hashtable of TC match-action rules
+ * @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder)
  * @reps_mport_id: MAE port allocated for representor RX
  * @reps_filter_uc: VNIC filter for representor unicast RX (promisc)
  * @reps_filter_mc: VNIC filter for representor multicast RX (allmulti)
@@ -181,6 +184,7 @@  struct efx_tc_state {
 	struct rhashtable encap_ht;
 	struct rhashtable encap_match_ht;
 	struct rhashtable match_action_ht;
+	struct rhashtable neigh_ht;
 	u32 reps_mport_id, reps_mport_vport_id;
 	s32 reps_filter_uc, reps_filter_mc;
 	bool flush_counters;
@@ -201,6 +205,9 @@  struct efx_tc_state {
 struct efx_rep;
 
 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev);
+struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
+					 struct net_device *dev);
+s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv);
 int efx_tc_configure_default_rule_rep(struct efx_rep *efv);
 void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
 				     struct efx_tc_flow_rule *rule);
diff --git a/drivers/net/ethernet/sfc/tc_bindings.c b/drivers/net/ethernet/sfc/tc_bindings.c
index c18d64519c2d..1b79c535c54e 100644
--- a/drivers/net/ethernet/sfc/tc_bindings.c
+++ b/drivers/net/ethernet/sfc/tc_bindings.c
@@ -10,6 +10,7 @@ 
 
 #include "tc_bindings.h"
 #include "tc.h"
+#include "tc_encap_actions.h"
 
 struct efx_tc_block_binding {
 	struct list_head list;
@@ -226,3 +227,15 @@  int efx_tc_setup(struct net_device *net_dev, enum tc_setup_type type,
 
 	return -EOPNOTSUPP;
 }
+
+int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event,
+			struct net_device *net_dev)
+{
+	if (efx->type->is_vf)
+		return NOTIFY_DONE;
+
+	if (event == NETDEV_UNREGISTER)
+		efx_tc_unregister_egdev(efx, net_dev);
+
+	return NOTIFY_OK;
+}
diff --git a/drivers/net/ethernet/sfc/tc_bindings.h b/drivers/net/ethernet/sfc/tc_bindings.h
index c210bb09150e..095ddeb59eb3 100644
--- a/drivers/net/ethernet/sfc/tc_bindings.h
+++ b/drivers/net/ethernet/sfc/tc_bindings.h
@@ -26,4 +26,6 @@  int efx_tc_indr_setup_cb(struct net_device *net_dev, struct Qdisc *sch,
 			 void *cb_priv, enum tc_setup_type type,
 			 void *type_data, void *data,
 			 void (*cleanup)(struct flow_block_cb *block_cb));
+int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event,
+			struct net_device *net_dev);
 #endif /* EFX_TC_BINDINGS_H */
diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c
index c41493e659a3..601141190f42 100644
--- a/drivers/net/ethernet/sfc/tc_encap_actions.c
+++ b/drivers/net/ethernet/sfc/tc_encap_actions.c
@@ -13,6 +13,14 @@ 
 #include "mae.h"
 #include <net/vxlan.h>
 #include <net/geneve.h>
+#include <net/netevent.h>
+#include <net/arp.h>
+
+static const struct rhashtable_params efx_neigh_ht_params = {
+	.key_len	= offsetof(struct efx_neigh_binder, ha),
+	.key_offset	= 0,
+	.head_offset	= offsetof(struct efx_neigh_binder, linkage),
+};
 
 static const struct rhashtable_params efx_tc_encap_ht_params = {
 	.key_len	= offsetofend(struct efx_tc_encap_action, key),
@@ -28,9 +36,32 @@  static void efx_tc_encap_free(void *ptr, void *__unused)
 	kfree(enc);
 }
 
+static void efx_neigh_free(void *ptr, void *__unused)
+{
+	struct efx_neigh_binder *neigh = ptr;
+
+	WARN_ON(refcount_read(&neigh->ref));
+	WARN_ON(!list_empty(&neigh->users));
+	put_net(neigh->net);
+	dev_put(neigh->egdev);
+	kfree(neigh);
+}
+
 int efx_tc_init_encap_actions(struct efx_nic *efx)
 {
-	return rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
+	int rc;
+
+	rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params);
+	if (rc < 0)
+		goto fail_neigh_ht;
+	rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
+	if (rc < 0)
+		goto fail_encap_ht;
+	return 0;
+fail_encap_ht:
+	rhashtable_destroy(&efx->tc->neigh_ht);
+fail_neigh_ht:
+	return rc;
 }
 
 /* Only call this in init failure teardown.
@@ -39,11 +70,333 @@  int efx_tc_init_encap_actions(struct efx_nic *efx)
 void efx_tc_destroy_encap_actions(struct efx_nic *efx)
 {
 	rhashtable_destroy(&efx->tc->encap_ht);
+	rhashtable_destroy(&efx->tc->neigh_ht);
 }
 
 void efx_tc_fini_encap_actions(struct efx_nic *efx)
 {
 	rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL);
+	rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL);
+}
+
+static void efx_neigh_update(struct work_struct *work);
+
+static int efx_bind_neigh(struct efx_nic *efx,
+			  struct efx_tc_encap_action *encap, struct net *net,
+			  struct netlink_ext_ack *extack)
+{
+	struct efx_neigh_binder *neigh, *old;
+	struct flowi6 flow6 = {};
+	struct flowi4 flow4 = {};
+	int rc;
+
+	/* GCC stupidly thinks that only values explicitly listed in the enum
+	 * definition can _possibly_ be sensible case values, so without this
+	 * cast it complains about the IPv6 versions.
+	 */
+	switch ((int)encap->type) {
+	case EFX_ENCAP_TYPE_VXLAN:
+	case EFX_ENCAP_TYPE_GENEVE:
+		flow4.flowi4_proto = IPPROTO_UDP;
+		flow4.fl4_dport = encap->key.tp_dst;
+		flow4.flowi4_tos = encap->key.tos;
+		flow4.daddr = encap->key.u.ipv4.dst;
+		flow4.saddr = encap->key.u.ipv4.src;
+		break;
+	case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
+	case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
+		flow6.flowi6_proto = IPPROTO_UDP;
+		flow6.fl6_dport = encap->key.tp_dst;
+		flow6.flowlabel = ip6_make_flowinfo(encap->key.tos,
+						    encap->key.label);
+		flow6.daddr = encap->key.u.ipv6.dst;
+		flow6.saddr = encap->key.u.ipv6.src;
+		break;
+	default:
+		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
+				       (int)encap->type);
+		return -EOPNOTSUPP;
+	}
+
+	neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT);
+	if (!neigh)
+		return -ENOMEM;
+	neigh->net = get_net(net);
+	neigh->dst_ip = flow4.daddr;
+	neigh->dst_ip6 = flow6.daddr;
+
+	old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht,
+						&neigh->linkage,
+						efx_neigh_ht_params);
+	if (old) {
+		/* don't need our new entry */
+		put_net(neigh->net);
+		kfree(neigh);
+		if (!refcount_inc_not_zero(&old->ref))
+			return -EAGAIN;
+		/* existing entry found, ref taken */
+		neigh = old;
+	} else {
+		/* New entry.  We need to initiate a lookup */
+		struct neighbour *n;
+		struct rtable *rt;
+
+		if (encap->type & EFX_ENCAP_FLAG_IPV6) {
+#if IS_ENABLED(CONFIG_IPV6)
+			struct dst_entry *dst;
+
+			dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
+							      NULL);
+			rc = PTR_ERR_OR_ZERO(dst);
+			if (rc) {
+				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
+				goto out_free;
+			}
+			dev_hold(neigh->egdev = dst->dev);
+			neigh->ttl = ip6_dst_hoplimit(dst);
+			n = dst_neigh_lookup(dst, &flow6.daddr);
+			dst_release(dst);
+#else
+			/* We shouldn't ever get here, because if IPv6 isn't
+			 * enabled how did someone create an IPv6 tunnel_key?
+			 */
+			rc = -EOPNOTSUPP;
+			NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
+#endif
+		} else {
+			rt = ip_route_output_key(net, &flow4);
+			if (IS_ERR_OR_NULL(rt)) {
+				rc = PTR_ERR(rt);
+				if (!rc)
+					rc = -EIO;
+				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
+				goto out_free;
+			}
+			dev_hold(neigh->egdev = rt->dst.dev);
+			neigh->ttl = ip4_dst_hoplimit(&rt->dst);
+			n = dst_neigh_lookup(&rt->dst, &flow4.daddr);
+			ip_rt_put(rt);
+		}
+		if (!n) {
+			rc = -ENETUNREACH;
+			NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
+			dev_put(neigh->egdev);
+			goto out_free;
+		}
+		refcount_set(&neigh->ref, 1);
+		INIT_LIST_HEAD(&neigh->users);
+		read_lock_bh(&n->lock);
+		ether_addr_copy(neigh->ha, n->ha);
+		neigh->n_valid = n->nud_state & NUD_VALID;
+		read_unlock_bh(&n->lock);
+		rwlock_init(&neigh->lock);
+		INIT_WORK(&neigh->work, efx_neigh_update);
+		neigh->efx = efx;
+		neigh->used = jiffies;
+		if (!neigh->n_valid)
+			/* Prod ARP to find us a neighbour */
+			neigh_event_send(n, NULL);
+		neigh_release(n);
+	}
+	/* Add us to this neigh */
+	encap->neigh = neigh;
+	list_add_tail(&encap->list, &neigh->users);
+	return 0;
+
+out_free:
+	/* cleanup common to several error paths */
+	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
+			       efx_neigh_ht_params);
+	synchronize_rcu();
+	put_net(net);
+	kfree(neigh);
+	return rc;
+}
+
+static void efx_free_neigh(struct efx_neigh_binder *neigh)
+{
+	struct efx_nic *efx = neigh->efx;
+
+	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
+			       efx_neigh_ht_params);
+	synchronize_rcu();
+	dev_put(neigh->egdev);
+	put_net(neigh->net);
+	kfree(neigh);
+}
+
+static void efx_release_neigh(struct efx_nic *efx,
+			      struct efx_tc_encap_action *encap)
+{
+	struct efx_neigh_binder *neigh = encap->neigh;
+
+	if (!neigh)
+		return;
+	list_del(&encap->list);
+	encap->neigh = NULL;
+	if (!refcount_dec_and_test(&neigh->ref))
+		return; /* still in use */
+	efx_free_neigh(neigh);
+}
+
+static void efx_gen_encap_header(struct efx_tc_encap_action *encap)
+{
+	/* stub for now */
+	encap->n_valid = false;
+	memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
+	encap->encap_hdr_len = ETH_HLEN;
+}
+
+static void efx_tc_update_encap(struct efx_nic *efx,
+				struct efx_tc_encap_action *encap)
+{
+	struct efx_tc_action_set_list *acts, *fallback;
+	struct efx_tc_flow_rule *rule;
+	struct efx_tc_action_set *act;
+	int rc;
+
+	if (encap->n_valid) {
+		/* Make sure no rules are using this encap while we change it */
+		list_for_each_entry(act, &encap->users, encap_user) {
+			acts = act->user;
+			if (WARN_ON(!acts)) /* can't happen */
+				continue;
+			rule = container_of(acts, struct efx_tc_flow_rule, acts);
+			if (rule->fallback)
+				fallback = rule->fallback;
+			else /* fallback fallback: deliver to PF */
+				fallback = &efx->tc->facts.pf;
+			rc = efx_mae_update_rule(efx, fallback->fw_id,
+						 rule->fw_id);
+			if (rc)
+				netif_err(efx, drv, efx->net_dev,
+					  "Failed to update (f) rule %08x rc %d\n",
+					  rule->fw_id, rc);
+			else
+				netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n",
+					  rule->fw_id);
+		}
+	}
+
+	if (encap->neigh) {
+		read_lock_bh(&encap->neigh->lock);
+		efx_gen_encap_header(encap);
+		read_unlock_bh(&encap->neigh->lock);
+	} else {
+		encap->n_valid = false;
+		memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
+		encap->encap_hdr_len = ETH_HLEN;
+	}
+
+	rc = efx_mae_update_encap_md(efx, encap);
+	if (rc) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Failed to update encap hdr %08x rc %d\n",
+			  encap->fw_id, rc);
+		return;
+	}
+	netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n",
+		  encap->fw_id);
+	if (!encap->n_valid)
+		return;
+	/* Update rule users: use the action if they are now ready */
+	list_for_each_entry(act, &encap->users, encap_user) {
+		acts = act->user;
+		if (WARN_ON(!acts)) /* can't happen */
+			continue;
+		rule = container_of(acts, struct efx_tc_flow_rule, acts);
+		if (!efx_tc_check_ready(efx, rule))
+			continue;
+		rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id);
+		if (rc)
+			netif_err(efx, drv, efx->net_dev,
+				  "Failed to update rule %08x rc %d\n",
+				  rule->fw_id, rc);
+		else
+			netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n",
+				  rule->fw_id);
+	}
+}
+
+static void efx_neigh_update(struct work_struct *work)
+{
+	struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work);
+	struct efx_tc_encap_action *encap;
+	struct efx_nic *efx = neigh->efx;
+
+	mutex_lock(&efx->tc->mutex);
+	list_for_each_entry(encap, &neigh->users, list)
+		efx_tc_update_encap(neigh->efx, encap);
+	/* release ref taken in efx_neigh_event() */
+	if (refcount_dec_and_test(&neigh->ref))
+		efx_free_neigh(neigh);
+	mutex_unlock(&efx->tc->mutex);
+}
+
+static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n)
+{
+	struct efx_neigh_binder keys = {NULL}, *neigh;
+	bool n_valid, ipv6 = false;
+	char ha[ETH_ALEN];
+	size_t keysize;
+
+	if (WARN_ON(!efx->tc))
+		return NOTIFY_DONE;
+
+	if (n->tbl == &arp_tbl) {
+		keysize = sizeof(keys.dst_ip);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (n->tbl == ipv6_stub->nd_tbl) {
+		ipv6 = true;
+		keysize = sizeof(keys.dst_ip6);
+#endif
+	} else {
+		return NOTIFY_DONE;
+	}
+	if (!n->parms) {
+		netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n");
+		return NOTIFY_DONE;
+	}
+	keys.net = read_pnet(&n->parms->net);
+	if (n->tbl->key_len != keysize) {
+		netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n",
+			   n->tbl->key_len);
+		return NOTIFY_DONE;
+	}
+	read_lock_bh(&n->lock); /* Get a consistent view */
+	memcpy(ha, n->ha, ETH_ALEN);
+	n_valid = (n->nud_state & NUD_VALID) && !n->dead;
+	read_unlock_bh(&n->lock);
+	if (ipv6)
+		memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len);
+	else
+		memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len);
+	rcu_read_lock();
+	neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys,
+				       efx_neigh_ht_params);
+	if (!neigh || neigh->dying)
+		/* We're not interested in this neighbour */
+		goto done;
+	write_lock_bh(&neigh->lock);
+	if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) {
+		write_unlock_bh(&neigh->lock);
+		/* Nothing has changed; no work to do */
+		goto done;
+	}
+	neigh->n_valid = n_valid;
+	memcpy(neigh->ha, ha, ETH_ALEN);
+	write_unlock_bh(&neigh->lock);
+	if (refcount_inc_not_zero(&neigh->ref)) {
+		rcu_read_unlock();
+		if (!schedule_work(&neigh->work))
+			/* failed to schedule, release the ref we just took */
+			if (refcount_dec_and_test(&neigh->ref))
+				efx_free_neigh(neigh);
+	} else {
+done:
+		rcu_read_unlock();
+	}
+	return NOTIFY_DONE;
 }
 
 bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
@@ -54,7 +407,7 @@  bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
 	 * neighbour info for the outer Ethernet header.
 	 */
 	list_for_each_entry(act, &rule->acts.list, list)
-		if (act->encap_md) /* neigh bindings not implemented yet */
+		if (act->encap_md && !act->encap_md->n_valid)
 			return false;
 	return true;
 }
@@ -65,6 +418,7 @@  struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
 {
 	enum efx_encap_type type = efx_tc_indr_netdev_type(egdev);
 	struct efx_tc_encap_action *encap, *old;
+	struct efx_rep *to_efv;
 	s64 rc;
 
 	if (type == EFX_ENCAP_TYPE_NONE) {
@@ -98,6 +452,7 @@  struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
 		return ERR_PTR(-ENOMEM);
 	encap->type = type;
 	encap->key = info->key;
+	INIT_LIST_HEAD(&encap->users);
 	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht,
 						&encap->linkage,
 						efx_tc_encap_ht_params);
@@ -110,9 +465,42 @@  struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
 		return old;
 	}
 
+	rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack);
+	if (rc < 0)
+		goto out_remove;
+	to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev);
+	if (IS_ERR(to_efv)) {
+		/* neigh->egdev isn't ours */
+		NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch");
+		rc = PTR_ERR(to_efv);
+		goto out_release;
+	}
+	rc = efx_tc_flower_external_mport(efx, to_efv);
+	if (rc < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port");
+		goto out_release;
+	}
+	encap->dest_mport = rc;
+	read_lock_bh(&encap->neigh->lock);
+	efx_gen_encap_header(encap);
+	read_unlock_bh(&encap->neigh->lock);
+
+	rc = efx_mae_allocate_encap_md(efx, encap);
+	if (rc < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw");
+		goto out_release;
+	}
+
 	/* ref and return */
 	refcount_set(&encap->ref, 1);
 	return encap;
+out_release:
+	efx_release_neigh(efx, encap);
+out_remove:
+	rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
+			       efx_tc_encap_ht_params);
+	kfree(encap);
+	return ERR_PTR(rc);
 }
 
 void efx_tc_flower_release_encap_md(struct efx_nic *efx,
@@ -120,7 +508,59 @@  void efx_tc_flower_release_encap_md(struct efx_nic *efx,
 {
 	if (!refcount_dec_and_test(&encap->ref))
 		return; /* still in use */
+	efx_release_neigh(efx, encap);
 	rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
 			       efx_tc_encap_ht_params);
+	efx_mae_free_encap_md(efx, encap);
 	kfree(encap);
 }
+
+static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh)
+{
+	struct efx_tc_encap_action *encap, *next;
+
+	list_for_each_entry_safe(encap, next, &neigh->users, list) {
+		/* Should cause neigh usage count to fall to zero, freeing it */
+		efx_release_neigh(efx, encap);
+		/* The encap has lost its neigh, so it's now unready */
+		efx_tc_update_encap(efx, encap);
+	}
+}
+
+void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev)
+{
+	struct efx_neigh_binder *neigh;
+	struct rhashtable_iter walk;
+
+	mutex_lock(&efx->tc->mutex);
+	rhashtable_walk_enter(&efx->tc->neigh_ht, &walk);
+	rhashtable_walk_start(&walk);
+	while ((neigh = rhashtable_walk_next(&walk)) != NULL) {
+		if (IS_ERR(neigh))
+			continue;
+		if (neigh->egdev != net_dev)
+			continue;
+		neigh->dying = true;
+		rhashtable_walk_stop(&walk);
+		synchronize_rcu(); /* Make sure any updates see dying flag */
+		efx_tc_remove_neigh_users(efx, neigh); /* might sleep */
+		rhashtable_walk_start(&walk);
+	}
+	rhashtable_walk_stop(&walk);
+	rhashtable_walk_exit(&walk);
+	mutex_unlock(&efx->tc->mutex);
+}
+
+int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
+			  void *ptr)
+{
+	if (efx->type->is_vf)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETEVENT_NEIGH_UPDATE:
+		return efx_neigh_event(efx, ptr);
+	default:
+		return NOTIFY_DONE;
+	}
+}
diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.h b/drivers/net/ethernet/sfc/tc_encap_actions.h
index 1a3679e81f09..79608d7e3deb 100644
--- a/drivers/net/ethernet/sfc/tc_encap_actions.h
+++ b/drivers/net/ethernet/sfc/tc_encap_actions.h
@@ -15,6 +15,50 @@ 
 #include <linux/refcount.h>
 #include <net/tc_act/tc_tunnel_key.h>
 
+/**
+ * struct efx_neigh_binder - driver state for a neighbour entry
+ * @net: the network namespace in which this neigh resides
+ * @dst_ip: the IPv4 destination address resolved by this neigh
+ * @dst_ip6: the IPv6 destination address resolved by this neigh
+ * @ha: the hardware (Ethernet) address of the neighbour
+ * @n_valid: true if the neighbour is in NUD_VALID state
+ * @lock: protects @ha and @n_valid
+ * @ttl: Time To Live associated with the route used
+ * @dying: set when egdev is going away, to skip further updates
+ * @egdev: egress device from the route lookup.  Holds a reference
+ * @ref: counts encap actions referencing this entry
+ * @used: jiffies of last time traffic hit any encap action using this.
+ *      When counter reads update this, a new neighbour event is sent to
+ *      indicate that the neighbour entry is still in use.
+ * @users: list of &struct efx_tc_encap_action
+ * @linkage: entry in efx->neigh_ht (keys are @net, @dst_ip, @dst_ip6).
+ * @work: processes neighbour state changes, updates the encap actions
+ * @efx: owning NIC instance.
+ *
+ * Associates a neighbour entry with the encap actions that are
+ * interested in it, allowing the latter to be updated when the
+ * neighbour details change.
+ * Whichever of @dst_ip and @dst_ip6 is not in use will be all-zeroes,
+ * this distinguishes IPv4 from IPv6 entries.
+ */
+struct efx_neigh_binder {
+	struct net *net;
+	__be32 dst_ip;
+	struct in6_addr dst_ip6;
+	char ha[ETH_ALEN];
+	bool n_valid;
+	rwlock_t lock;
+	u8 ttl;
+	bool dying;
+	struct net_device *egdev;
+	refcount_t ref;
+	unsigned long used;
+	struct list_head users;
+	struct rhash_head linkage;
+	struct work_struct work;
+	struct efx_nic *efx;
+};
+
 /* This limit is arbitrary; current hardware (SN1022) handles encap headers
  * of up to 126 bytes, but that limit is not enshrined in the MCDI protocol.
  */
@@ -24,7 +68,11 @@  struct efx_tc_encap_action {
 	struct ip_tunnel_key key; /* 52 bytes */
 	u32 dest_mport; /* is copied into struct efx_tc_action_set */
 	u8 encap_hdr_len;
+	bool n_valid;
 	u8 encap_hdr[EFX_TC_MAX_ENCAP_HDR];
+	struct efx_neigh_binder *neigh;
+	struct list_head list; /* entry on neigh->users list */
+	struct list_head users; /* action sets using this encap_md */
 	struct rhash_head linkage; /* efx->tc_encap_ht */
 	refcount_t ref;
 	u32 fw_id; /* index of this entry in firmware encap table */
@@ -44,4 +92,8 @@  struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
 void efx_tc_flower_release_encap_md(struct efx_nic *efx,
 				    struct efx_tc_encap_action *encap);
 
+void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev);
+int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
+			  void *ptr);
+
 #endif /* EFX_TC_ENCAP_ACTIONS_H */