diff mbox series

[net-next,v3,2/4] net/sched: add retpoline wrapper for tc

Message ID 20221205171520.1731689-3-pctammela@mojatatu.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net/sched: retpoline wrappers for tc | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/apply fail Patch does not apply to net-next

Commit Message

Pedro Tammela Dec. 5, 2022, 5:15 p.m. UTC
On kernels compiled with CONFIG_RETPOLINE and CONFIG_NET_TC_INDIRECT_WRAPPER,
optimize actions and filters that are compiled as built-ins into a direct call.
The calls are ordered according to relevance. Testing data shows that
the pps difference between first and last is between 0.5%-1.0%.

On subsequent patches we expose the classifiers and actions functions
and wire up the wrapper into tc.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
---
 include/net/tc_wrapper.h | 226 +++++++++++++++++++++++++++++++++++++++
 net/sched/Kconfig        |  13 +++
 2 files changed, 239 insertions(+)
 create mode 100644 include/net/tc_wrapper.h

Comments

Eric Dumazet Dec. 5, 2022, 5:23 p.m. UTC | #1
On Mon, Dec 5, 2022 at 6:16 PM Pedro Tammela <pctammela@mojatatu.com> wrote:
>
> On kernels compiled with CONFIG_RETPOLINE and CONFIG_NET_TC_INDIRECT_WRAPPER,
> optimize actions and filters that are compiled as built-ins into a direct call.
> The calls are ordered according to relevance. Testing data shows that
> the pps difference between first and last is between 0.5%-1.0%.
>
> On subsequent patches we expose the classifiers and actions functions
> and wire up the wrapper into tc.
>
> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Reviewed-by: Victor Nogueira <victor@mojatatu.com>
> ---
>  include/net/tc_wrapper.h | 226 +++++++++++++++++++++++++++++++++++++++
>  net/sched/Kconfig        |  13 +++
>  2 files changed, 239 insertions(+)
>  create mode 100644 include/net/tc_wrapper.h
>
> diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
> new file mode 100644
> index 000000000000..3bdebbfdf9d2
> --- /dev/null
> +++ b/include/net/tc_wrapper.h
> @@ -0,0 +1,226 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __NET_TC_WRAPPER_H
> +#define __NET_TC_WRAPPER_H
> +
> +#include <linux/indirect_call_wrapper.h>
> +#include <net/pkt_cls.h>
> +
> +#if IS_ENABLED(CONFIG_NET_TC_INDIRECT_WRAPPER)
> +
> +#define TC_INDIRECT_SCOPE
> +
> +/* TC Actions */
> +#ifdef CONFIG_NET_CLS_ACT
> +
> +#define TC_INDIRECT_ACTION_DECLARE(fname)                              \
> +       INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb,       \
> +                                           const struct tc_action *a, \
> +                                           struct tcf_result *res))
> +
> +TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_nat_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_police_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_sample_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_simp_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act);
> +TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
> +
> +static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
> +                          struct tcf_result *res)
> +{

Perhaps you could add a static key to enable this retpoline avoidance only
on cpus without hardware support.  (IBRS enabled cpus would basically
use a jump to
directly go to the

return a->ops->act(skb, a, res);
Pedro Tammela Dec. 5, 2022, 5:27 p.m. UTC | #2
On 05/12/2022 14:23, Eric Dumazet wrote:
> On Mon, Dec 5, 2022 at 6:16 PM Pedro Tammela <pctammela@mojatatu.com> wrote:
>>
>> On kernels compiled with CONFIG_RETPOLINE and CONFIG_NET_TC_INDIRECT_WRAPPER,
>> optimize actions and filters that are compiled as built-ins into a direct call.
>> The calls are ordered according to relevance. Testing data shows that
>> the pps difference between first and last is between 0.5%-1.0%.
>>
>> On subsequent patches we expose the classifiers and actions functions
>> and wire up the wrapper into tc.
>>
>> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
>> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
>> Reviewed-by: Victor Nogueira <victor@mojatatu.com>
>> ---
>>   include/net/tc_wrapper.h | 226 +++++++++++++++++++++++++++++++++++++++
>>   net/sched/Kconfig        |  13 +++
>>   2 files changed, 239 insertions(+)
>>   create mode 100644 include/net/tc_wrapper.h
>>
>> diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
>> new file mode 100644
>> index 000000000000..3bdebbfdf9d2
>> --- /dev/null
>> +++ b/include/net/tc_wrapper.h
>> @@ -0,0 +1,226 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __NET_TC_WRAPPER_H
>> +#define __NET_TC_WRAPPER_H
>> +
>> +#include <linux/indirect_call_wrapper.h>
>> +#include <net/pkt_cls.h>
>> +
>> +#if IS_ENABLED(CONFIG_NET_TC_INDIRECT_WRAPPER)
>> +
>> +#define TC_INDIRECT_SCOPE
>> +
>> +/* TC Actions */
>> +#ifdef CONFIG_NET_CLS_ACT
>> +
>> +#define TC_INDIRECT_ACTION_DECLARE(fname)                              \
>> +       INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb,       \
>> +                                           const struct tc_action *a, \
>> +                                           struct tcf_result *res))
>> +
>> +TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_nat_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_police_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_sample_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_simp_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act);
>> +TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
>> +
>> +static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
>> +                          struct tcf_result *res)
>> +{
> 
> Perhaps you could add a static key to enable this retpoline avoidance only
> on cpus without hardware support.  (IBRS enabled cpus would basically
> use a jump to
> directly go to the
> 
> return a->ops->act(skb, a, res);

Makes sense, and then we drop the Kconfig option?
Pedro Tammela Dec. 5, 2022, 6:45 p.m. UTC | #3
On 05/12/2022 14:23, Eric Dumazet wrote:
> On Mon, Dec 5, 2022 at 6:16 PM Pedro Tammela <pctammela@mojatatu.com> wrote:
>>
>> On kernels compiled with CONFIG_RETPOLINE and CONFIG_NET_TC_INDIRECT_WRAPPER,
>> optimize actions and filters that are compiled as built-ins into a direct call.
>> The calls are ordered according to relevance. Testing data shows that
>> the pps difference between first and last is between 0.5%-1.0%.
>>
>> On subsequent patches we expose the classifiers and actions functions
>> and wire up the wrapper into tc.
>>
>> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
>> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
>> Reviewed-by: Victor Nogueira <victor@mojatatu.com>
>> ---
>>   include/net/tc_wrapper.h | 226 +++++++++++++++++++++++++++++++++++++++
>>   net/sched/Kconfig        |  13 +++
>>   2 files changed, 239 insertions(+)
>>   create mode 100644 include/net/tc_wrapper.h
>>
>> diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
>> new file mode 100644
>> index 000000000000..3bdebbfdf9d2
>> --- /dev/null
>> +++ b/include/net/tc_wrapper.h
>> @@ -0,0 +1,226 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __NET_TC_WRAPPER_H
>> +#define __NET_TC_WRAPPER_H
>> +
>> +#include <linux/indirect_call_wrapper.h>
>> +#include <net/pkt_cls.h>
>> +
>> +#if IS_ENABLED(CONFIG_NET_TC_INDIRECT_WRAPPER)
>> +
>> +#define TC_INDIRECT_SCOPE
>> +
>> +/* TC Actions */
>> +#ifdef CONFIG_NET_CLS_ACT
>> +
>> +#define TC_INDIRECT_ACTION_DECLARE(fname)                              \
>> +       INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb,       \
>> +                                           const struct tc_action *a, \
>> +                                           struct tcf_result *res))
>> +
>> +TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_nat_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_police_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_sample_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_simp_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act);
>> +TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act);
>> +TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
>> +
>> +static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
>> +                          struct tcf_result *res)
>> +{
> 
> Perhaps you could add a static key to enable this retpoline avoidance only
> on cpus without hardware support.  (IBRS enabled cpus would basically
> use a jump to
> directly go to the
> 
> return a->ops->act(skb, a, res);

Something like this you have in mind? Not tested, just compiled:

diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index 3bdebbfdf9d2..8a74bcf4a2e0 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -2,13 +2,19 @@
  #ifndef __NET_TC_WRAPPER_H
  #define __NET_TC_WRAPPER_H

-#include <linux/indirect_call_wrapper.h>
  #include <net/pkt_cls.h>

-#if IS_ENABLED(CONFIG_NET_TC_INDIRECT_WRAPPER)
+#if IS_ENABLED(CONFIG_RETPOLINE)
+
+#include <asm/cpufeature.h>
+
+#include <linux/static_key.h>
+#include <linux/indirect_call_wrapper.h>

  #define TC_INDIRECT_SCOPE

+static DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
+
  /* TC Actions */
  #ifdef CONFIG_NET_CLS_ACT

@@ -41,6 +47,9 @@ TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
  static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
                            struct tcf_result *res)
  {
+       if (static_branch_unlikely(&tc_skip_wrapper))
+               goto skip;
+
  #if IS_BUILTIN(CONFIG_NET_ACT_GACT)
         if (a->ops->act == tcf_gact_act)
                 return tcf_gact_act(skb, a, res);
@@ -122,6 +131,7 @@ static inline int tc_act(struct sk_buff *skb, const 
struct tc_action *a,
                 return tcf_sample_act(skb, a, res);
  #endif

+skip:
         return a->ops->act(skb, a, res);
  }

@@ -151,6 +161,9 @@ TC_INDIRECT_FILTER_DECLARE(u32_classify);
  static inline int tc_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
                                 struct tcf_result *res)
  {
+       if (static_branch_unlikely(&tc_skip_wrapper))
+               goto skip;
+
  #if IS_BUILTIN(CONFIG_NET_CLS_BPF)
         if (tp->classify == cls_bpf_classify)
                 return cls_bpf_classify(skb, tp, res);
@@ -200,9 +213,16 @@ static inline int tc_classify(struct sk_buff *skb, 
const struct tcf_proto *tp,
                 return tcindex_classify(skb, tp, res);
  #endif

+skip:
         return tp->classify(skb, tp, res);
  }

+static inline void tc_wrapper_init(void)
+{
+       if (boot_cpu_has(X86_FEATURE_IBRS))
+               static_branch_enable(&tc_skip_wrapper);
+}
+
  #endif /* CONFIG_NET_CLS */

  #else
@@ -221,6 +241,10 @@ static inline int tc_classify(struct sk_buff *skb, 
const struct tcf_proto *tp,
         return tp->classify(skb, tp, res);
  }

+static inline void tc_wrapper_init(void)
+{
+}
+
  #endif

  #endif /* __NET_TC_WRAPPER_H */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9bc055f8013e..1e8ab4749c6c 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -1021,19 +1021,6 @@ config NET_TC_SKB_EXT

           Say N here if you won't be using tc<->ovs offload or tc 
chains offload.

-config NET_TC_INDIRECT_WRAPPER
-       bool "TC indirect call wrapper"
-       depends on NET_SCHED
-       depends on RETPOLINE
-
-       help
-         Say Y here to skip indirect calls in the TC datapath for known
-         builtin classifiers/actions under CONFIG_RETPOLINE kernels.
-
-         TC may run slower on CPUs with hardware based mitigations.
-
-         If unsure, say N.
-
  endif # NET_SCHED

  config NET_SCH_FIFO
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 5b3c0ac495be..44d4b1e4e18e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -2179,6 +2179,8 @@ static int __init tc_action_init(void)
         rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, 
tc_dump_action,
                       0);

+       tc_wrapper_init();
+
         return 0;
  }

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 668130f08903..39b6f6331dee 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3765,6 +3765,8 @@ static int __init tc_filter_init(void)
         rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
                       tc_dump_chain, 0);

+       tc_wrapper_init();
+
         return 0;

  err_register_pernet_subsys:
diff mbox series

Patch

diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
new file mode 100644
index 000000000000..3bdebbfdf9d2
--- /dev/null
+++ b/include/net/tc_wrapper.h
@@ -0,0 +1,226 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_WRAPPER_H
+#define __NET_TC_WRAPPER_H
+
+#include <linux/indirect_call_wrapper.h>
+#include <net/pkt_cls.h>
+
+#if IS_ENABLED(CONFIG_NET_TC_INDIRECT_WRAPPER)
+
+#define TC_INDIRECT_SCOPE
+
+/* TC Actions */
+#ifdef CONFIG_NET_CLS_ACT
+
+#define TC_INDIRECT_ACTION_DECLARE(fname)                              \
+	INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb,       \
+					    const struct tc_action *a, \
+					    struct tcf_result *res))
+
+TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_nat_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_police_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_sample_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_simp_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act);
+TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
+
+static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
+			   struct tcf_result *res)
+{
+#if IS_BUILTIN(CONFIG_NET_ACT_GACT)
+	if (a->ops->act == tcf_gact_act)
+		return tcf_gact_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_MIRRED)
+	if (a->ops->act == tcf_mirred_act)
+		return tcf_mirred_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_PEDIT)
+	if (a->ops->act == tcf_pedit_act)
+		return tcf_pedit_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT)
+	if (a->ops->act == tcf_skbedit_act)
+		return tcf_skbedit_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SKBMOD)
+	if (a->ops->act == tcf_skbmod_act)
+		return tcf_skbmod_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_POLICE)
+	if (a->ops->act == tcf_police_act)
+		return tcf_police_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_BPF)
+	if (a->ops->act == tcf_bpf_act)
+		return tcf_bpf_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CONNMARK)
+	if (a->ops->act == tcf_connmark_act)
+		return tcf_connmark_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CSUM)
+	if (a->ops->act == tcf_csum_act)
+		return tcf_csum_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CT)
+	if (a->ops->act == tcf_ct_act)
+		return tcf_ct_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CTINFO)
+	if (a->ops->act == tcf_ctinfo_act)
+		return tcf_ctinfo_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_GATE)
+	if (a->ops->act == tcf_gate_act)
+		return tcf_gate_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_MPLS)
+	if (a->ops->act == tcf_mpls_act)
+		return tcf_mpls_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_NAT)
+	if (a->ops->act == tcf_nat_act)
+		return tcf_nat_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY)
+	if (a->ops->act == tunnel_key_act)
+		return tunnel_key_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_VLAN)
+	if (a->ops->act == tcf_vlan_act)
+		return tcf_vlan_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_IFE)
+	if (a->ops->act == tcf_ife_act)
+		return tcf_ife_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_IPT)
+	if (a->ops->act == tcf_ipt_act)
+		return tcf_ipt_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SIMP)
+	if (a->ops->act == tcf_simp_act)
+		return tcf_simp_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SAMPLE)
+	if (a->ops->act == tcf_sample_act)
+		return tcf_sample_act(skb, a, res);
+#endif
+
+	return a->ops->act(skb, a, res);
+}
+
+#endif /* CONFIG_NET_CLS_ACT */
+
+/* TC Filters */
+#ifdef CONFIG_NET_CLS
+
+#define TC_INDIRECT_FILTER_DECLARE(fname)                               \
+	INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb,        \
+					    const struct tcf_proto *tp, \
+					    struct tcf_result *res))
+
+TC_INDIRECT_FILTER_DECLARE(basic_classify);
+TC_INDIRECT_FILTER_DECLARE(cls_bpf_classify);
+TC_INDIRECT_FILTER_DECLARE(cls_cgroup_classify);
+TC_INDIRECT_FILTER_DECLARE(fl_classify);
+TC_INDIRECT_FILTER_DECLARE(flow_classify);
+TC_INDIRECT_FILTER_DECLARE(fw_classify);
+TC_INDIRECT_FILTER_DECLARE(mall_classify);
+TC_INDIRECT_FILTER_DECLARE(route4_classify);
+TC_INDIRECT_FILTER_DECLARE(rsvp_classify);
+TC_INDIRECT_FILTER_DECLARE(rsvp6_classify);
+TC_INDIRECT_FILTER_DECLARE(tcindex_classify);
+TC_INDIRECT_FILTER_DECLARE(u32_classify);
+
+static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+				struct tcf_result *res)
+{
+#if IS_BUILTIN(CONFIG_NET_CLS_BPF)
+	if (tp->classify == cls_bpf_classify)
+		return cls_bpf_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_U32)
+	if (tp->classify == u32_classify)
+		return u32_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FLOWER)
+	if (tp->classify == fl_classify)
+		return fl_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FW)
+	if (tp->classify == fw_classify)
+		return fw_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_MATCHALL)
+	if (tp->classify == mall_classify)
+		return mall_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_BASIC)
+	if (tp->classify == basic_classify)
+		return basic_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
+	if (tp->classify == cls_cgroup_classify)
+		return cls_cgroup_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FLOW)
+	if (tp->classify == flow_classify)
+		return flow_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_ROUTE4)
+	if (tp->classify == route4_classify)
+		return route4_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_RSVP)
+	if (tp->classify == rsvp_classify)
+		return rsvp_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6)
+	if (tp->classify == rsvp6_classify)
+		return rsvp6_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX)
+	if (tp->classify == tcindex_classify)
+		return tcindex_classify(skb, tp, res);
+#endif
+
+	return tp->classify(skb, tp, res);
+}
+
+#endif /* CONFIG_NET_CLS */
+
+#else
+
+#define TC_INDIRECT_SCOPE static
+
+static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
+			   struct tcf_result *res)
+{
+	return a->ops->act(skb, a, res);
+}
+
+static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+				struct tcf_result *res)
+{
+	return tp->classify(skb, tp, res);
+}
+
+#endif
+
+#endif /* __NET_TC_WRAPPER_H */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 1e8ab4749c6c..9bc055f8013e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -1021,6 +1021,19 @@  config NET_TC_SKB_EXT
 
 	  Say N here if you won't be using tc<->ovs offload or tc chains offload.
 
+config NET_TC_INDIRECT_WRAPPER
+	bool "TC indirect call wrapper"
+	depends on NET_SCHED
+	depends on RETPOLINE
+
+	help
+	  Say Y here to skip indirect calls in the TC datapath for known
+	  builtin classifiers/actions under CONFIG_RETPOLINE kernels.
+
+	  TC may run slower on CPUs with hardware based mitigations.
+
+	  If unsure, say N.
+
 endif # NET_SCHED
 
 config NET_SCH_FIFO