diff mbox series

[net-next,5/5] sfc: add offloading of 'foreign' TC (decap) rules

Message ID a7aabdb45290f1cd50681eb9e1d610893fbce299.1678815095.git.ecree.xilinx@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series sfc: support TC decap rules | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 19 this patch: 21
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang fail Errors and warnings before: 18 this patch: 20
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 19 this patch: 21
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

edward.cree@amd.com March 14, 2023, 5:35 p.m. UTC
From: Edward Cree <ecree.xilinx@gmail.com>

A 'foreign' rule is one for which the net_dev is not the sfc netdevice
 or any of its representors.  The driver registers indirect flow blocks
 for tunnel netdevs so that it can offload decap rules.  For example:

    tc filter add dev vxlan0 parent ffff: protocol ipv4 flower \
        enc_src_ip 10.1.0.2 enc_dst_ip 10.1.0.1 \
        enc_key_id 1000 enc_dst_port 4789 \
        action tunnel_key unset \
        action mirred egress redirect dev $REPRESENTOR

When notified of a rule like this, register an encap match on the IP
 and dport tuple (creating an Outer Rule table entry) and insert an MAE
 action rule to perform the decapsulation and deliver to the representee.

Move efx_tc_delete_rule() below efx_tc_flower_release_encap_match() to
 avoid the need for a forward declaration.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
---
 drivers/net/ethernet/sfc/mae.c |  28 ++-
 drivers/net/ethernet/sfc/mae.h |   3 +
 drivers/net/ethernet/sfc/tc.c  | 359 +++++++++++++++++++++++++++++++--
 drivers/net/ethernet/sfc/tc.h  |   1 +
 4 files changed, 374 insertions(+), 17 deletions(-)

Comments

kernel test robot March 14, 2023, 8:29 p.m. UTC | #1
Hi,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20230314]
[cannot apply to net-next/master horms-ipvs/master linus/master v6.3-rc2 v6.3-rc1 v6.2 v6.3-rc2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/edward-cree-amd-com/sfc-add-notion-of-match-on-enc-keys-to-MAE-machinery/20230315-013855
patch link:    https://lore.kernel.org/r/a7aabdb45290f1cd50681eb9e1d610893fbce299.1678815095.git.ecree.xilinx%40gmail.com
patch subject: [PATCH net-next 5/5] sfc: add offloading of 'foreign' TC (decap) rules
config: s390-allyesconfig (https://download.01.org/0day-ci/archive/20230315/202303150436.cQ46tTwI-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/51a99241aafeb3bd67a12aae5e9089c7aff2f3cd
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review edward-cree-amd-com/sfc-add-notion-of-match-on-enc-keys-to-MAE-machinery/20230315-013855
        git checkout 51a99241aafeb3bd67a12aae5e9089c7aff2f3cd
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash drivers/net/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303150436.cQ46tTwI-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/net/ethernet/sfc/tc.c:21:21: warning: no previous prototype for 'efx_tc_indr_netdev_type' [-Wmissing-prototypes]
      21 | enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
         |                     ^~~~~~~~~~~~~~~~~~~~~~~


vim +/efx_tc_indr_netdev_type +21 drivers/net/ethernet/sfc/tc.c

    20	
  > 21	enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
    22	{
    23		if (netif_is_vxlan(net_dev))
    24			return EFX_ENCAP_TYPE_VXLAN;
    25		if (netif_is_geneve(net_dev))
    26			return EFX_ENCAP_TYPE_GENEVE;
    27	
    28		return EFX_ENCAP_TYPE_NONE;
    29	}
    30
Michal Swiatkowski March 15, 2023, 10:11 a.m. UTC | #2
On Tue, Mar 14, 2023 at 05:35:25PM +0000, edward.cree@amd.com wrote:
> From: Edward Cree <ecree.xilinx@gmail.com>
> 
> A 'foreign' rule is one for which the net_dev is not the sfc netdevice
>  or any of its representors.  The driver registers indirect flow blocks
>  for tunnel netdevs so that it can offload decap rules.  For example:
> 
>     tc filter add dev vxlan0 parent ffff: protocol ipv4 flower \
>         enc_src_ip 10.1.0.2 enc_dst_ip 10.1.0.1 \
>         enc_key_id 1000 enc_dst_port 4789 \
>         action tunnel_key unset \
>         action mirred egress redirect dev $REPRESENTOR
> 
> When notified of a rule like this, register an encap match on the IP
>  and dport tuple (creating an Outer Rule table entry) and insert an MAE
>  action rule to perform the decapsulation and deliver to the representee.
> 
> Move efx_tc_delete_rule() below efx_tc_flower_release_encap_match() to
>  avoid the need for a forward declaration.
> 
> Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
> ---
>  drivers/net/ethernet/sfc/mae.c |  28 ++-
>  drivers/net/ethernet/sfc/mae.h |   3 +
>  drivers/net/ethernet/sfc/tc.c  | 359 +++++++++++++++++++++++++++++++--
>  drivers/net/ethernet/sfc/tc.h  |   1 +
>  4 files changed, 374 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c
> index 754391eb575f..e8139076fcb0 100644
> --- a/drivers/net/ethernet/sfc/mae.c
> +++ b/drivers/net/ethernet/sfc/mae.c
> @@ -241,6 +241,7 @@ static int efx_mae_get_basic_caps(struct efx_nic *efx, struct mae_caps *caps)
>  	if (outlen < sizeof(outbuf))
>  		return -EIO;
>  	caps->match_field_count = MCDI_DWORD(outbuf, MAE_GET_CAPS_OUT_MATCH_FIELD_COUNT);
> +	caps->encap_types = MCDI_DWORD(outbuf, MAE_GET_CAPS_OUT_ENCAP_TYPES_SUPPORTED);
>  	caps->action_prios = MCDI_DWORD(outbuf, MAE_GET_CAPS_OUT_ACTION_PRIOS);
>  	return 0;
>  }
> @@ -513,6 +514,28 @@ int efx_mae_check_encap_match_caps(struct efx_nic *efx, unsigned char ipv,
>  }
>  #undef CHECK
>  
> +int efx_mae_check_encap_type_supported(struct efx_nic *efx, enum efx_encap_type typ)
> +{
> +	unsigned int bit;
> +
> +	switch (typ & EFX_ENCAP_TYPES_MASK) {
In 3/5 You ommited EFX_ENCAP_TYPE_NVGRE, was it intetional?

> +	case EFX_ENCAP_TYPE_VXLAN:
> +		bit = MC_CMD_MAE_GET_CAPS_OUT_ENCAP_TYPE_VXLAN_LBN;
> +		break;
> +	case EFX_ENCAP_TYPE_NVGRE:
> +		bit = MC_CMD_MAE_GET_CAPS_OUT_ENCAP_TYPE_NVGRE_LBN;
> +		break;
> +	case EFX_ENCAP_TYPE_GENEVE:
> +		bit = MC_CMD_MAE_GET_CAPS_OUT_ENCAP_TYPE_GENEVE_LBN;
> +		break;
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +	if (efx->tc->caps->encap_types & BIT(bit))
> +		return 0;
> +	return -EOPNOTSUPP;
> +}
> +
>  int efx_mae_allocate_counter(struct efx_nic *efx, struct efx_tc_counter *cnt)
>  {
>  	MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_COUNTER_ALLOC_OUT_LEN(1));
> @@ -772,9 +795,10 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
>  	size_t outlen;
>  	int rc;
>  
> -	MCDI_POPULATE_DWORD_2(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
> +	MCDI_POPULATE_DWORD_3(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
>  			      MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push,
> -			      MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop);
> +			      MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop,
> +			      MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap);
>  
>  	MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
>  		       MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
> diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h
> index 5b45138aaaf4..6cc96f8adfea 100644
> --- a/drivers/net/ethernet/sfc/mae.h
> +++ b/drivers/net/ethernet/sfc/mae.h
> @@ -70,6 +70,7 @@ void efx_mae_counters_grant_credits(struct work_struct *work);
>  
>  struct mae_caps {
>  	u32 match_field_count;
> +	u32 encap_types;
>  	u32 action_prios;
>  	u8 action_rule_fields[MAE_NUM_FIELDS];
>  	u8 outer_rule_fields[MAE_NUM_FIELDS];
> @@ -82,6 +83,8 @@ int efx_mae_match_check_caps(struct efx_nic *efx,
>  			     struct netlink_ext_ack *extack);
>  int efx_mae_check_encap_match_caps(struct efx_nic *efx, unsigned char ipv,
>  				   struct netlink_ext_ack *extack);
> +int efx_mae_check_encap_type_supported(struct efx_nic *efx,
> +				       enum efx_encap_type typ);
>  
>  int efx_mae_allocate_counter(struct efx_nic *efx, struct efx_tc_counter *cnt);
>  int efx_mae_free_counter(struct efx_nic *efx, struct efx_tc_counter *cnt);
> diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
> index dc092403af12..8ccf25260312 100644
> --- a/drivers/net/ethernet/sfc/tc.c
> +++ b/drivers/net/ethernet/sfc/tc.c
> @@ -10,12 +10,24 @@
>   */
>  
>  #include <net/pkt_cls.h>
> +#include <net/vxlan.h>
> +#include <net/geneve.h>
>  #include "tc.h"
>  #include "tc_bindings.h"
>  #include "mae.h"
>  #include "ef100_rep.h"
>  #include "efx.h"
>  
> +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
> +{
> +	if (netif_is_vxlan(net_dev))
> +		return EFX_ENCAP_TYPE_VXLAN;
> +	if (netif_is_geneve(net_dev))
> +		return EFX_ENCAP_TYPE_GENEVE;
netif_is_gretap or NVGRE isn't supported?

> +
> +	return EFX_ENCAP_TYPE_NONE;
> +}
> +
>  #define EFX_EFV_PF	NULL
>  /* Look up the representor information (efv) for a device.
>   * May return NULL for the PF (us), or an error pointer for a device that
> @@ -43,6 +55,20 @@ static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
>  	return efv;
>  }
>  
> +/* Convert a driver-internal vport ID into an internal device (PF or VF) */
> +static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
> +{
> +	u32 mport;
> +
> +	if (IS_ERR(efv))
> +		return PTR_ERR(efv);
> +	if (!efv) /* device is PF (us) */
> +		efx_mae_mport_uplink(efx, &mport);
> +	else /* device is repr */
> +		efx_mae_mport_mport(efx, efv->mport, &mport);
> +	return mport;
> +}
> +
>  /* Convert a driver-internal vport ID into an external device (wire or VF) */
>  static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
>  {
> @@ -106,15 +132,6 @@ static void efx_tc_free_action_set_list(struct efx_nic *efx,
>  	/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
>  }
>  
> -static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
> -{
> -	efx_mae_delete_rule(efx, rule->fw_id);
> -
> -	/* Release entries in subsidiary tables */
> -	efx_tc_free_action_set_list(efx, &rule->acts, true);
> -	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
> -}
> -
>  static void efx_tc_flow_free(void *ptr, void *arg)
>  {
>  	struct efx_tc_flow_rule *rule = ptr;
> @@ -350,7 +367,6 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
>  	return 0;
>  }
>  
> -__always_unused
>  static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
>  					    struct efx_tc_match *match,
>  					    enum efx_encap_type type,
> @@ -479,7 +495,6 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
>  	return rc;
>  }
>  
> -__always_unused
>  static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
>  					      struct efx_tc_encap_match *encap)
>  {
> @@ -501,8 +516,38 @@ static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
>  	kfree(encap);
>  }
>  
> +static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
> +{
> +	efx_mae_delete_rule(efx, rule->fw_id);
> +
> +	/* Release entries in subsidiary tables */
> +	efx_tc_free_action_set_list(efx, &rule->acts, true);
> +	if (rule->match.encap)
> +		efx_tc_flower_release_encap_match(efx, rule->match.encap);
> +	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
> +}
> +
> +static const char *efx_tc_encap_type_name(enum efx_encap_type typ, char *buf,
> +					  size_t n)
> +{
> +	switch (typ) {
> +	case EFX_ENCAP_TYPE_NONE:
> +		return "none";
> +	case EFX_ENCAP_TYPE_VXLAN:
> +		return "vxlan";
> +	case EFX_ENCAP_TYPE_NVGRE:
> +		return "nvgre";
> +	case EFX_ENCAP_TYPE_GENEVE:
> +		return "geneve";
> +	default:
> +		snprintf(buf, n, "type %u\n", typ);
> +		return buf;
I will return unsupported here, instead of playing with buffer.

> +	}
> +}
> +
>  /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
Is it a device documentation? Where it can be find?

>  enum efx_tc_action_order {
> +	EFX_TC_AO_DECAP,
>  	EFX_TC_AO_VLAN_POP,
>  	EFX_TC_AO_VLAN_PUSH,
>  	EFX_TC_AO_COUNT,
> @@ -513,6 +558,10 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
>  					  enum efx_tc_action_order new)
>  {
>  	switch (new) {
> +	case EFX_TC_AO_DECAP:
> +		if (act->decap)
> +			return false;
> +		fallthrough;
>  	case EFX_TC_AO_VLAN_POP:
>  		if (act->vlan_pop >= 2)
>  			return false;
> @@ -540,6 +589,288 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
>  	}
>  }
>  
> +static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
> +					 struct net_device *net_dev,
> +					 struct flow_cls_offload *tc)
> +{
> +	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
> +	struct netlink_ext_ack *extack = tc->common.extack;
> +	struct efx_tc_flow_rule *rule = NULL, *old = NULL;
> +	struct efx_tc_action_set *act = NULL;
> +	bool found = false, uplinked = false;
> +	const struct flow_action_entry *fa;
> +	struct efx_tc_match match;
> +	struct efx_rep *to_efv;
> +	s64 rc;
> +	int i;
> +
> +	/* Parse match */
> +	memset(&match, 0, sizeof(match));
> +	rc = efx_tc_flower_parse_match(efx, fr, &match, NULL);
> +	if (rc)
> +		return rc;
> +	/* The rule as given to us doesn't specify a source netdevice.
> +	 * But, determining whether packets from a VF should match it is
> +	 * complicated, so leave those to the software slowpath: qualify
> +	 * the filter with source m-port == wire.
> +	 */
> +	rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
Let's define extern_port as s64, a rc as int, it will be more readable I
think.

> +	if (rc < 0) {
> +		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
> +		return rc;
> +	}
> +	match.value.ingress_port = rc;
> +	match.mask.ingress_port = ~0;
> +
> +	if (tc->common.chain_index) {
> +		NL_SET_ERR_MSG_MOD(extack, "No support for nonzero chain_index");
> +		return -EOPNOTSUPP;
> +	}
> +	match.mask.recirc_id = 0xff;
> +
> +	flow_action_for_each(i, fa, &fr->action) {
> +		switch (fa->id) {
> +		case FLOW_ACTION_REDIRECT:
> +		case FLOW_ACTION_MIRRED: /* mirred means mirror here */
> +			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
> +			if (IS_ERR(to_efv))
> +				continue;
> +			found = true;
> +			break;
> +		default:
> +			break;
> +		}
> +	}
> +	if (!found) { /* We don't care. */
> +		netif_dbg(efx, drv, efx->net_dev,
> +			  "Ignoring foreign filter that doesn't egdev us\n");
> +		rc = -EOPNOTSUPP;
> +		goto release;
> +	}
> +
> +	rc = efx_mae_match_check_caps(efx, &match.mask, NULL);
> +	if (rc)
> +		goto release;
> +
> +	if (efx_tc_match_is_encap(&match.mask)) {
> +		enum efx_encap_type type;
> +
> +		type = efx_tc_indr_netdev_type(net_dev);
> +		if (type == EFX_ENCAP_TYPE_NONE) {
> +			NL_SET_ERR_MSG_MOD(extack,
> +					   "Egress encap match on unsupported tunnel device");
> +			rc = -EOPNOTSUPP;
> +			goto release;
> +		}
> +
> +		rc = efx_mae_check_encap_type_supported(efx, type);
> +		if (rc) {
> +			char errbuf[16];
> +
> +			NL_SET_ERR_MSG_FMT_MOD(extack,
> +					       "Firmware reports no support for %s encap match",
> +					       efx_tc_encap_type_name(type, errbuf,
> +								      sizeof(errbuf)));
> +			goto release;
> +		}
> +
> +		rc = efx_tc_flower_record_encap_match(efx, &match, type,
> +						      extack);
> +		if (rc)
> +			goto release;
> +	} else {
> +		/* This is not a tunnel decap rule, ignore it */
> +		netif_dbg(efx, drv, efx->net_dev,
> +			  "Ignoring foreign filter without encap match\n");
> +		rc = -EOPNOTSUPP;
> +		goto release;
> +	}
> +
> +	rule = kzalloc(sizeof(*rule), GFP_USER);
> +	if (!rule) {
> +		rc = -ENOMEM;
> +		goto release;
> +	}
> +	INIT_LIST_HEAD(&rule->acts.list);
> +	rule->cookie = tc->cookie;
> +	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
> +						&rule->linkage,
> +						efx_tc_match_action_ht_params);
> +	if (old) {
> +		netif_dbg(efx, drv, efx->net_dev,
> +			  "Ignoring already-offloaded rule (cookie %lx)\n",
> +			  tc->cookie);
> +		rc = -EEXIST;
> +		goto release;
> +	}
> +
> +	/* Parse actions */
> +	act = kzalloc(sizeof(*act), GFP_USER);
> +	if (!act) {
> +		rc = -ENOMEM;
> +		goto release;
> +	}
> +
> +	/* Parse actions.  For foreign rules we only support decap & redirect */
> +	flow_action_for_each(i, fa, &fr->action) {
> +		struct efx_tc_action_set save;
> +
> +		switch (fa->id) {
> +		case FLOW_ACTION_REDIRECT:
> +		case FLOW_ACTION_MIRRED:
> +			/* See corresponding code in efx_tc_flower_replace() for
> +			 * long explanations of what's going on here.
> +			 */
> +			save = *act;
Why save is needed here? In one bloick You are changing act, in other
save.

> +			if (fa->hw_stats) {
> +				struct efx_tc_counter_index *ctr;
> +
> +				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
> +					NL_SET_ERR_MSG_FMT_MOD(extack,
> +							       "hw_stats_type %u not supported (only 'delayed')",
> +							       fa->hw_stats);
> +					rc = -EOPNOTSUPP;
> +					goto release;
> +				}
> +				if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
> +					rc = -EOPNOTSUPP;
> +					goto release;
> +				}
> +
> +				ctr = efx_tc_flower_get_counter_index(efx,
> +								      tc->cookie,
> +								      EFX_TC_COUNTER_TYPE_AR);
> +				if (IS_ERR(ctr)) {
> +					rc = PTR_ERR(ctr);
> +					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
> +					goto release;
> +				}
> +				act->count = ctr;
> +			}
> +
> +			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
> +				/* can't happen */
> +				rc = -EOPNOTSUPP;
> +				NL_SET_ERR_MSG_MOD(extack,
> +						   "Deliver action violates action order (can't happen)");
> +				goto release;
> +			}
> +			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
> +			/* PF implies egdev is us, in which case we really
> +			 * want to deliver to the uplink (because this is an
> +			 * ingress filter).  If we don't recognise the egdev
> +			 * at all, then we'd better trap so SW can handle it.
> +			 */
> +			if (IS_ERR(to_efv))
> +				to_efv = EFX_EFV_PF;
> +			if (to_efv == EFX_EFV_PF) {
> +				if (uplinked)
> +					break;
> +				uplinked = true;
> +			}
> +			rc = efx_tc_flower_internal_mport(efx, to_efv);
> +			if (rc < 0) {
> +				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
> +				goto release;
> +			}
> +			act->dest_mport = rc;
> +			act->deliver = 1;
> +			rc = efx_mae_alloc_action_set(efx, act);
> +			if (rc) {
> +				NL_SET_ERR_MSG_MOD(extack,
> +						   "Failed to write action set to hw (mirred)");
> +				goto release;
> +			}
> +			list_add_tail(&act->list, &rule->acts.list);
> +			act = NULL;
act was allocated, You need to free it, or maybe it is being cleared in
alloc_action_set()? However, this function is really hard to follow.
Please explain it more widely.

> +			if (fa->id == FLOW_ACTION_REDIRECT)
> +				break; /* end of the line */
> +			/* Mirror, so continue on with saved act */
> +			save.count = NULL;
> +			act = kzalloc(sizeof(*act), GFP_USER);
> +			if (!act) {
> +				rc = -ENOMEM;
> +				goto release;
> +			}
> +			*act = save;
> +			break;
> +		case FLOW_ACTION_TUNNEL_DECAP:
> +			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
> +				rc = -EINVAL;
> +				NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
> +				goto release;
> +			}
> +			act->decap = 1;
> +			/* If we previously delivered/trapped to uplink, now
> +			 * that we've decapped we'll want another copy if we
> +			 * try to deliver/trap to uplink again.
> +			 */
> +			uplinked = false;
> +			break;
> +		default:
> +			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
> +					       fa->id);
> +			rc = -EOPNOTSUPP;
> +			goto release;
> +		}
> +	}
> +
> +	if (act) {
> +		if (!uplinked) {
> +			/* Not shot/redirected, so deliver to default dest (which is
> +			 * the uplink, as this is an ingress filter)
> +			 */
> +			efx_mae_mport_uplink(efx, &act->dest_mport);
> +			act->deliver = 1;
> +		}
> +		rc = efx_mae_alloc_action_set(efx, act);
> +		if (rc) {
> +			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
> +			goto release;
> +		}
> +		list_add_tail(&act->list, &rule->acts.list);
> +		act = NULL; /* Prevent double-free in error path */
> +	}
> +
> +	rule->match = match;
> +
> +	netif_dbg(efx, drv, efx->net_dev,
> +		  "Successfully parsed foreign filter (cookie %lx)\n",
> +		  tc->cookie);
> +
> +	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
> +	if (rc) {
> +		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
> +		goto release;
> +	}
> +	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
> +				 rule->acts.fw_id, &rule->fw_id);
> +	if (rc) {
> +		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
> +		goto release_act;
> +	}
act is saved somewhere?

> +	return 0;
> +
> +release_act:
> +	efx_mae_free_action_set_list(efx, &rule->acts);
> +release:
> +	/* We failed to insert the rule, so free up any entries we created in
> +	 * subsidiary tables.
> +	 */
> +	if (act)
> +		efx_tc_free_action_set(efx, act, false);
> +	if (rule) {
> +		rhashtable_remove_fast(&efx->tc->match_action_ht,
> +				       &rule->linkage,
> +				       efx_tc_match_action_ht_params);
> +		efx_tc_free_action_set_list(efx, &rule->acts, false);
> +	}
> +	kfree(rule);
> +	if (match.encap)
> +		efx_tc_flower_release_encap_match(efx, match.encap);
> +	return rc;
> +}
> +
>  static int efx_tc_flower_replace(struct efx_nic *efx,
>  				 struct net_device *net_dev,
>  				 struct flow_cls_offload *tc,
> @@ -564,10 +895,8 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
>  
>  	from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
>  	if (IS_ERR(from_efv)) {
> -		/* Might be a tunnel decap rule from an indirect block.
> -		 * Support for those not implemented yet.
> -		 */
> -		return -EOPNOTSUPP;
> +		/* Not from our PF or representors, so probably a tunnel dev */
> +		return efx_tc_flower_replace_foreign(efx, net_dev, tc);
>  	}
>  
>  	if (efv != from_efv) {
> diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h
> index d70c0ba86669..47b6e9e35808 100644
> --- a/drivers/net/ethernet/sfc/tc.h
> +++ b/drivers/net/ethernet/sfc/tc.h
> @@ -28,6 +28,7 @@ static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr)
>  struct efx_tc_action_set {
>  	u16 vlan_push:2;
>  	u16 vlan_pop:2;
> +	u16 decap:1;
>  	u16 deliver:1;
>  	__be16 vlan_tci[2]; /* TCIs for vlan_push */
>  	__be16 vlan_proto[2]; /* Ethertypes for vlan_push */
Edward Cree March 15, 2023, 2:43 p.m. UTC | #3
On 15/03/2023 10:11, Michal Swiatkowski wrote:
> On Tue, Mar 14, 2023 at 05:35:25PM +0000, edward.cree@amd.com wrote:
>> +int efx_mae_check_encap_type_supported(struct efx_nic *efx, enum efx_encap_type typ)
>> +{
>> +	unsigned int bit;
>> +
>> +	switch (typ & EFX_ENCAP_TYPES_MASK) {
> In 3/5 You ommited EFX_ENCAP_TYPE_NVGRE, was it intetional?

No, I'll go back and add it.

>> +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
>> +{
>> +	if (netif_is_vxlan(net_dev))
>> +		return EFX_ENCAP_TYPE_VXLAN;
>> +	if (netif_is_geneve(net_dev))
>> +		return EFX_ENCAP_TYPE_GENEVE;
> netif_is_gretap or NVGRE isn't supported?

It should be supported, the hardware can handle it.
I'll add it in v2, and test to make sure it actually works ;)

>> +static const char *efx_tc_encap_type_name(enum efx_encap_type typ, char *buf,
>> +					  size_t n)
>> +{
>> +	switch (typ) {
>> +	case EFX_ENCAP_TYPE_NONE:
>> +		return "none";
>> +	case EFX_ENCAP_TYPE_VXLAN:
>> +		return "vxlan";
>> +	case EFX_ENCAP_TYPE_NVGRE:
>> +		return "nvgre";
>> +	case EFX_ENCAP_TYPE_GENEVE:
>> +		return "geneve";
>> +	default:
>> +		snprintf(buf, n, "type %u\n", typ);
>> +		return buf;
> I will return unsupported here, instead of playing with buffer.

Hmm, maybe if I add a one-time netif_warn()?  I don't like the
 idea of not getting the bogus value out anywhere where it can
 be debugged.

>>  /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
> Is it a device documentation? Where it can be find?

Ahh, turns out this document isn't public :(  I'll see if we
 can get this section of it split out into something public.

> 
>>  enum efx_tc_action_order {
>> +	EFX_TC_AO_DECAP,
>>  	EFX_TC_AO_VLAN_POP,
>>  	EFX_TC_AO_VLAN_PUSH,
>>  	EFX_TC_AO_COUNT,
>> @@ -513,6 +558,10 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
>>  					  enum efx_tc_action_order new)
>>  {
>>  	switch (new) {
>> +	case EFX_TC_AO_DECAP:
>> +		if (act->decap)
>> +			return false;
>> +		fallthrough;
>>  	case EFX_TC_AO_VLAN_POP:
>>  		if (act->vlan_pop >= 2)
>>  			return false;
>> @@ -540,6 +589,288 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
>>  	}
>>  }
>>  
>> +static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
>> +					 struct net_device *net_dev,
>> +					 struct flow_cls_offload *tc)
>> +{
>> +	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
>> +	struct netlink_ext_ack *extack = tc->common.extack;
>> +	struct efx_tc_flow_rule *rule = NULL, *old = NULL;
>> +	struct efx_tc_action_set *act = NULL;
>> +	bool found = false, uplinked = false;
>> +	const struct flow_action_entry *fa;
>> +	struct efx_tc_match match;
>> +	struct efx_rep *to_efv;
>> +	s64 rc;
>> +	int i;
>> +
>> +	/* Parse match */
>> +	memset(&match, 0, sizeof(match));
>> +	rc = efx_tc_flower_parse_match(efx, fr, &match, NULL);
>> +	if (rc)
>> +		return rc;
>> +	/* The rule as given to us doesn't specify a source netdevice.
>> +	 * But, determining whether packets from a VF should match it is
>> +	 * complicated, so leave those to the software slowpath: qualify
>> +	 * the filter with source m-port == wire.
>> +	 */
>> +	rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
> Let's define extern_port as s64, a rc as int, it will be more readable I
> think.

Will it?  I feel it looks more natural this way — this is the way
 it would be written if mports always fitted into an int; the fact
 that they're actually u32 and thus the value needs to be wider to
 hold either an mport or an -errno is locally irrelevant to the
 reader.

>> +	/* Parse actions.  For foreign rules we only support decap & redirect */
>> +	flow_action_for_each(i, fa, &fr->action) {
>> +		struct efx_tc_action_set save;
>> +
>> +		switch (fa->id) {
>> +		case FLOW_ACTION_REDIRECT:
>> +		case FLOW_ACTION_MIRRED:
>> +			/* See corresponding code in efx_tc_flower_replace() for
>> +			 * long explanations of what's going on here.
>> +			 */
>> +			save = *act;
> Why save is needed here? In one bloick You are changing act, in other
> save.

Below, we set:

>> +			act->dest_mport = rc;
>> +			act->deliver = 1;

but then if this action is a mirred egress mirror, we don't want the
 next action-set in the action-set-list to deliver to the same place.
So we save the action state (collection of mutations applied to the
 packet) before the mirred, so that we can resume from where we left
 off:

>> +			/* Mirror, so continue on with saved act */
>> +			save.count = NULL;
>> +			act = kzalloc(sizeof(*act), GFP_USER);
>> +			if (!act) {
>> +				rc = -ENOMEM;
>> +				goto release;
>> +			}
>> +			*act = save;

The setting of save.count is a copy-paste error; that's needed in
 the non-tunnel case (efx_tc_flower_replace()) since that has a
 separate block for attaching counters (handling the case of
 FLOW_ACTION_DROP which we don't accept here in
 efx_tc_flower_replace_foreign()), but it's not needed here; maybe
 that was part of the confusion.

>> +			rc = efx_mae_alloc_action_set(efx, act);
>> +			if (rc) {
>> +				NL_SET_ERR_MSG_MOD(extack,
>> +						   "Failed to write action set to hw (mirred)");
>> +				goto release;
>> +			}
>> +			list_add_tail(&act->list, &rule->acts.list);
>> +			act = NULL;
> act was allocated, You need to free it, or maybe it is being cleared in
> alloc_action_set()? However, this function is really hard to follow.
> Please explain it more widely.

The list_add_tail attaches act (the action-set) to the list in
 rule->acts (the action-set-list), meaning it will be freed when the
 action-set-list is destroyed by efx_tc_free_action_set_list(),
 either in this function's failure ladder or in efx_tc_delete_rule().
I will try and write an extended comment under /* Parse actions */ to
 explain the use of this act 'cursor'.  Or rather, I'll add the
 comment to the efx_tc_flower_replace() equivalent, which works the
 same way, and reference it from here.

>> +	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
>> +				 rule->acts.fw_id, &rule->fw_id);
>> +	if (rc) {
>> +		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
>> +		goto release_act;
>> +	}
> act is saved somewhere?

Sorry, this should be called `release_acts`, as its job is to
 remove the action-set-list (rule->acts) from hardware.  Will fix.

Thanks for the very thorough review of this series :)
Edward Cree March 22, 2023, 10:35 p.m. UTC | #4
On 15/03/2023 14:43, Edward Cree wrote:
> On 15/03/2023 10:11, Michal Swiatkowski wrote:
>> On Tue, Mar 14, 2023 at 05:35:25PM +0000, edward.cree@amd.com wrote:
>>> +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
>>> +{
>>> +	if (netif_is_vxlan(net_dev))
>>> +		return EFX_ENCAP_TYPE_VXLAN;
>>> +	if (netif_is_geneve(net_dev))
>>> +		return EFX_ENCAP_TYPE_GENEVE;
>> netif_is_gretap or NVGRE isn't supported?
> 
> It should be supported, the hardware can handle it.
> I'll add it in v2, and test to make sure it actually works ;)

Fun discovery: while the hardware supports NVGRE, it can *only*
 match on the VSID field, not the whole GRE Key.
TC flower, meanwhile, neither knows nor cares about NVGRE; gretap
 decap rules expect to match on the full 32-bit Key field, and you
 can't even mask them (there's no TCA_FLOWER_KEY_ENC_KEY_ID_MASK
 in the uAPI), meaning the driver can't just require the FlowID is
 masked out and shift the rest.

So enabling this support is nontrivial; I've decided to leave it
 out of the series and just remove all mention of NVGRE for now.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c
index 754391eb575f..e8139076fcb0 100644
--- a/drivers/net/ethernet/sfc/mae.c
+++ b/drivers/net/ethernet/sfc/mae.c
@@ -241,6 +241,7 @@  static int efx_mae_get_basic_caps(struct efx_nic *efx, struct mae_caps *caps)
 	if (outlen < sizeof(outbuf))
 		return -EIO;
 	caps->match_field_count = MCDI_DWORD(outbuf, MAE_GET_CAPS_OUT_MATCH_FIELD_COUNT);
+	caps->encap_types = MCDI_DWORD(outbuf, MAE_GET_CAPS_OUT_ENCAP_TYPES_SUPPORTED);
 	caps->action_prios = MCDI_DWORD(outbuf, MAE_GET_CAPS_OUT_ACTION_PRIOS);
 	return 0;
 }
@@ -513,6 +514,28 @@  int efx_mae_check_encap_match_caps(struct efx_nic *efx, unsigned char ipv,
 }
 #undef CHECK
 
+int efx_mae_check_encap_type_supported(struct efx_nic *efx, enum efx_encap_type typ)
+{
+	unsigned int bit;
+
+	switch (typ & EFX_ENCAP_TYPES_MASK) {
+	case EFX_ENCAP_TYPE_VXLAN:
+		bit = MC_CMD_MAE_GET_CAPS_OUT_ENCAP_TYPE_VXLAN_LBN;
+		break;
+	case EFX_ENCAP_TYPE_NVGRE:
+		bit = MC_CMD_MAE_GET_CAPS_OUT_ENCAP_TYPE_NVGRE_LBN;
+		break;
+	case EFX_ENCAP_TYPE_GENEVE:
+		bit = MC_CMD_MAE_GET_CAPS_OUT_ENCAP_TYPE_GENEVE_LBN;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	if (efx->tc->caps->encap_types & BIT(bit))
+		return 0;
+	return -EOPNOTSUPP;
+}
+
 int efx_mae_allocate_counter(struct efx_nic *efx, struct efx_tc_counter *cnt)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_COUNTER_ALLOC_OUT_LEN(1));
@@ -772,9 +795,10 @@  int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
 	size_t outlen;
 	int rc;
 
-	MCDI_POPULATE_DWORD_2(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
+	MCDI_POPULATE_DWORD_3(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
 			      MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push,
-			      MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop);
+			      MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop,
+			      MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap);
 
 	MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
 		       MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h
index 5b45138aaaf4..6cc96f8adfea 100644
--- a/drivers/net/ethernet/sfc/mae.h
+++ b/drivers/net/ethernet/sfc/mae.h
@@ -70,6 +70,7 @@  void efx_mae_counters_grant_credits(struct work_struct *work);
 
 struct mae_caps {
 	u32 match_field_count;
+	u32 encap_types;
 	u32 action_prios;
 	u8 action_rule_fields[MAE_NUM_FIELDS];
 	u8 outer_rule_fields[MAE_NUM_FIELDS];
@@ -82,6 +83,8 @@  int efx_mae_match_check_caps(struct efx_nic *efx,
 			     struct netlink_ext_ack *extack);
 int efx_mae_check_encap_match_caps(struct efx_nic *efx, unsigned char ipv,
 				   struct netlink_ext_ack *extack);
+int efx_mae_check_encap_type_supported(struct efx_nic *efx,
+				       enum efx_encap_type typ);
 
 int efx_mae_allocate_counter(struct efx_nic *efx, struct efx_tc_counter *cnt);
 int efx_mae_free_counter(struct efx_nic *efx, struct efx_tc_counter *cnt);
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index dc092403af12..8ccf25260312 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -10,12 +10,24 @@ 
  */
 
 #include <net/pkt_cls.h>
+#include <net/vxlan.h>
+#include <net/geneve.h>
 #include "tc.h"
 #include "tc_bindings.h"
 #include "mae.h"
 #include "ef100_rep.h"
 #include "efx.h"
 
+enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
+{
+	if (netif_is_vxlan(net_dev))
+		return EFX_ENCAP_TYPE_VXLAN;
+	if (netif_is_geneve(net_dev))
+		return EFX_ENCAP_TYPE_GENEVE;
+
+	return EFX_ENCAP_TYPE_NONE;
+}
+
 #define EFX_EFV_PF	NULL
 /* Look up the representor information (efv) for a device.
  * May return NULL for the PF (us), or an error pointer for a device that
@@ -43,6 +55,20 @@  static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
 	return efv;
 }
 
+/* Convert a driver-internal vport ID into an internal device (PF or VF) */
+static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
+{
+	u32 mport;
+
+	if (IS_ERR(efv))
+		return PTR_ERR(efv);
+	if (!efv) /* device is PF (us) */
+		efx_mae_mport_uplink(efx, &mport);
+	else /* device is repr */
+		efx_mae_mport_mport(efx, efv->mport, &mport);
+	return mport;
+}
+
 /* Convert a driver-internal vport ID into an external device (wire or VF) */
 static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
 {
@@ -106,15 +132,6 @@  static void efx_tc_free_action_set_list(struct efx_nic *efx,
 	/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
 }
 
-static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
-{
-	efx_mae_delete_rule(efx, rule->fw_id);
-
-	/* Release entries in subsidiary tables */
-	efx_tc_free_action_set_list(efx, &rule->acts, true);
-	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
-}
-
 static void efx_tc_flow_free(void *ptr, void *arg)
 {
 	struct efx_tc_flow_rule *rule = ptr;
@@ -350,7 +367,6 @@  static int efx_tc_flower_parse_match(struct efx_nic *efx,
 	return 0;
 }
 
-__always_unused
 static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
 					    struct efx_tc_match *match,
 					    enum efx_encap_type type,
@@ -479,7 +495,6 @@  static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
 	return rc;
 }
 
-__always_unused
 static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
 					      struct efx_tc_encap_match *encap)
 {
@@ -501,8 +516,38 @@  static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
 	kfree(encap);
 }
 
+static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
+{
+	efx_mae_delete_rule(efx, rule->fw_id);
+
+	/* Release entries in subsidiary tables */
+	efx_tc_free_action_set_list(efx, &rule->acts, true);
+	if (rule->match.encap)
+		efx_tc_flower_release_encap_match(efx, rule->match.encap);
+	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
+}
+
+static const char *efx_tc_encap_type_name(enum efx_encap_type typ, char *buf,
+					  size_t n)
+{
+	switch (typ) {
+	case EFX_ENCAP_TYPE_NONE:
+		return "none";
+	case EFX_ENCAP_TYPE_VXLAN:
+		return "vxlan";
+	case EFX_ENCAP_TYPE_NVGRE:
+		return "nvgre";
+	case EFX_ENCAP_TYPE_GENEVE:
+		return "geneve";
+	default:
+		snprintf(buf, n, "type %u\n", typ);
+		return buf;
+	}
+}
+
 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
 enum efx_tc_action_order {
+	EFX_TC_AO_DECAP,
 	EFX_TC_AO_VLAN_POP,
 	EFX_TC_AO_VLAN_PUSH,
 	EFX_TC_AO_COUNT,
@@ -513,6 +558,10 @@  static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
 					  enum efx_tc_action_order new)
 {
 	switch (new) {
+	case EFX_TC_AO_DECAP:
+		if (act->decap)
+			return false;
+		fallthrough;
 	case EFX_TC_AO_VLAN_POP:
 		if (act->vlan_pop >= 2)
 			return false;
@@ -540,6 +589,288 @@  static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
 	}
 }
 
+static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
+					 struct net_device *net_dev,
+					 struct flow_cls_offload *tc)
+{
+	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
+	struct netlink_ext_ack *extack = tc->common.extack;
+	struct efx_tc_flow_rule *rule = NULL, *old = NULL;
+	struct efx_tc_action_set *act = NULL;
+	bool found = false, uplinked = false;
+	const struct flow_action_entry *fa;
+	struct efx_tc_match match;
+	struct efx_rep *to_efv;
+	s64 rc;
+	int i;
+
+	/* Parse match */
+	memset(&match, 0, sizeof(match));
+	rc = efx_tc_flower_parse_match(efx, fr, &match, NULL);
+	if (rc)
+		return rc;
+	/* The rule as given to us doesn't specify a source netdevice.
+	 * But, determining whether packets from a VF should match it is
+	 * complicated, so leave those to the software slowpath: qualify
+	 * the filter with source m-port == wire.
+	 */
+	rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
+	if (rc < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
+		return rc;
+	}
+	match.value.ingress_port = rc;
+	match.mask.ingress_port = ~0;
+
+	if (tc->common.chain_index) {
+		NL_SET_ERR_MSG_MOD(extack, "No support for nonzero chain_index");
+		return -EOPNOTSUPP;
+	}
+	match.mask.recirc_id = 0xff;
+
+	flow_action_for_each(i, fa, &fr->action) {
+		switch (fa->id) {
+		case FLOW_ACTION_REDIRECT:
+		case FLOW_ACTION_MIRRED: /* mirred means mirror here */
+			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
+			if (IS_ERR(to_efv))
+				continue;
+			found = true;
+			break;
+		default:
+			break;
+		}
+	}
+	if (!found) { /* We don't care. */
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Ignoring foreign filter that doesn't egdev us\n");
+		rc = -EOPNOTSUPP;
+		goto release;
+	}
+
+	rc = efx_mae_match_check_caps(efx, &match.mask, NULL);
+	if (rc)
+		goto release;
+
+	if (efx_tc_match_is_encap(&match.mask)) {
+		enum efx_encap_type type;
+
+		type = efx_tc_indr_netdev_type(net_dev);
+		if (type == EFX_ENCAP_TYPE_NONE) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Egress encap match on unsupported tunnel device");
+			rc = -EOPNOTSUPP;
+			goto release;
+		}
+
+		rc = efx_mae_check_encap_type_supported(efx, type);
+		if (rc) {
+			char errbuf[16];
+
+			NL_SET_ERR_MSG_FMT_MOD(extack,
+					       "Firmware reports no support for %s encap match",
+					       efx_tc_encap_type_name(type, errbuf,
+								      sizeof(errbuf)));
+			goto release;
+		}
+
+		rc = efx_tc_flower_record_encap_match(efx, &match, type,
+						      extack);
+		if (rc)
+			goto release;
+	} else {
+		/* This is not a tunnel decap rule, ignore it */
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Ignoring foreign filter without encap match\n");
+		rc = -EOPNOTSUPP;
+		goto release;
+	}
+
+	rule = kzalloc(sizeof(*rule), GFP_USER);
+	if (!rule) {
+		rc = -ENOMEM;
+		goto release;
+	}
+	INIT_LIST_HEAD(&rule->acts.list);
+	rule->cookie = tc->cookie;
+	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
+						&rule->linkage,
+						efx_tc_match_action_ht_params);
+	if (old) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Ignoring already-offloaded rule (cookie %lx)\n",
+			  tc->cookie);
+		rc = -EEXIST;
+		goto release;
+	}
+
+	/* Parse actions */
+	act = kzalloc(sizeof(*act), GFP_USER);
+	if (!act) {
+		rc = -ENOMEM;
+		goto release;
+	}
+
+	/* Parse actions.  For foreign rules we only support decap & redirect */
+	flow_action_for_each(i, fa, &fr->action) {
+		struct efx_tc_action_set save;
+
+		switch (fa->id) {
+		case FLOW_ACTION_REDIRECT:
+		case FLOW_ACTION_MIRRED:
+			/* See corresponding code in efx_tc_flower_replace() for
+			 * long explanations of what's going on here.
+			 */
+			save = *act;
+			if (fa->hw_stats) {
+				struct efx_tc_counter_index *ctr;
+
+				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
+					NL_SET_ERR_MSG_FMT_MOD(extack,
+							       "hw_stats_type %u not supported (only 'delayed')",
+							       fa->hw_stats);
+					rc = -EOPNOTSUPP;
+					goto release;
+				}
+				if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
+					rc = -EOPNOTSUPP;
+					goto release;
+				}
+
+				ctr = efx_tc_flower_get_counter_index(efx,
+								      tc->cookie,
+								      EFX_TC_COUNTER_TYPE_AR);
+				if (IS_ERR(ctr)) {
+					rc = PTR_ERR(ctr);
+					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
+					goto release;
+				}
+				act->count = ctr;
+			}
+
+			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
+				/* can't happen */
+				rc = -EOPNOTSUPP;
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Deliver action violates action order (can't happen)");
+				goto release;
+			}
+			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
+			/* PF implies egdev is us, in which case we really
+			 * want to deliver to the uplink (because this is an
+			 * ingress filter).  If we don't recognise the egdev
+			 * at all, then we'd better trap so SW can handle it.
+			 */
+			if (IS_ERR(to_efv))
+				to_efv = EFX_EFV_PF;
+			if (to_efv == EFX_EFV_PF) {
+				if (uplinked)
+					break;
+				uplinked = true;
+			}
+			rc = efx_tc_flower_internal_mport(efx, to_efv);
+			if (rc < 0) {
+				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
+				goto release;
+			}
+			act->dest_mport = rc;
+			act->deliver = 1;
+			rc = efx_mae_alloc_action_set(efx, act);
+			if (rc) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Failed to write action set to hw (mirred)");
+				goto release;
+			}
+			list_add_tail(&act->list, &rule->acts.list);
+			act = NULL;
+			if (fa->id == FLOW_ACTION_REDIRECT)
+				break; /* end of the line */
+			/* Mirror, so continue on with saved act */
+			save.count = NULL;
+			act = kzalloc(sizeof(*act), GFP_USER);
+			if (!act) {
+				rc = -ENOMEM;
+				goto release;
+			}
+			*act = save;
+			break;
+		case FLOW_ACTION_TUNNEL_DECAP:
+			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
+				rc = -EINVAL;
+				NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
+				goto release;
+			}
+			act->decap = 1;
+			/* If we previously delivered/trapped to uplink, now
+			 * that we've decapped we'll want another copy if we
+			 * try to deliver/trap to uplink again.
+			 */
+			uplinked = false;
+			break;
+		default:
+			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
+					       fa->id);
+			rc = -EOPNOTSUPP;
+			goto release;
+		}
+	}
+
+	if (act) {
+		if (!uplinked) {
+			/* Not shot/redirected, so deliver to default dest (which is
+			 * the uplink, as this is an ingress filter)
+			 */
+			efx_mae_mport_uplink(efx, &act->dest_mport);
+			act->deliver = 1;
+		}
+		rc = efx_mae_alloc_action_set(efx, act);
+		if (rc) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
+			goto release;
+		}
+		list_add_tail(&act->list, &rule->acts.list);
+		act = NULL; /* Prevent double-free in error path */
+	}
+
+	rule->match = match;
+
+	netif_dbg(efx, drv, efx->net_dev,
+		  "Successfully parsed foreign filter (cookie %lx)\n",
+		  tc->cookie);
+
+	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
+		goto release;
+	}
+	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
+				 rule->acts.fw_id, &rule->fw_id);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
+		goto release_act;
+	}
+	return 0;
+
+release_act:
+	efx_mae_free_action_set_list(efx, &rule->acts);
+release:
+	/* We failed to insert the rule, so free up any entries we created in
+	 * subsidiary tables.
+	 */
+	if (act)
+		efx_tc_free_action_set(efx, act, false);
+	if (rule) {
+		rhashtable_remove_fast(&efx->tc->match_action_ht,
+				       &rule->linkage,
+				       efx_tc_match_action_ht_params);
+		efx_tc_free_action_set_list(efx, &rule->acts, false);
+	}
+	kfree(rule);
+	if (match.encap)
+		efx_tc_flower_release_encap_match(efx, match.encap);
+	return rc;
+}
+
 static int efx_tc_flower_replace(struct efx_nic *efx,
 				 struct net_device *net_dev,
 				 struct flow_cls_offload *tc,
@@ -564,10 +895,8 @@  static int efx_tc_flower_replace(struct efx_nic *efx,
 
 	from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
 	if (IS_ERR(from_efv)) {
-		/* Might be a tunnel decap rule from an indirect block.
-		 * Support for those not implemented yet.
-		 */
-		return -EOPNOTSUPP;
+		/* Not from our PF or representors, so probably a tunnel dev */
+		return efx_tc_flower_replace_foreign(efx, net_dev, tc);
 	}
 
 	if (efv != from_efv) {
diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h
index d70c0ba86669..47b6e9e35808 100644
--- a/drivers/net/ethernet/sfc/tc.h
+++ b/drivers/net/ethernet/sfc/tc.h
@@ -28,6 +28,7 @@  static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr)
 struct efx_tc_action_set {
 	u16 vlan_push:2;
 	u16 vlan_pop:2;
+	u16 decap:1;
 	u16 deliver:1;
 	__be16 vlan_tci[2]; /* TCIs for vlan_push */
 	__be16 vlan_proto[2]; /* Ethertypes for vlan_push */