diff mbox series

[RFC,net-next] net/sched: act_mirred: allow mirred ingress through networking backlog

Message ID 0b153a5ab818dff51110f81550a4050538605a4b.1667252314.git.dcaratti@redhat.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series [RFC,net-next] net/sched: act_mirred: allow mirred ingress through networking backlog | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 12 this patch: 12
netdev/cc_maintainers warning 3 maintainers not CCed: kuba@kernel.org edumazet@google.com davem@davemloft.net
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 12 this patch: 12
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Davide Caratti Oct. 31, 2022, 9:44 p.m. UTC
using TC mirrred in the ingress direction, packets are passed directly
to the receiver in the same context. There are a couple of reasons that
justify the proposal to use kernel networking backlog instead:

a) avoid the soft lockup observed with TCP when it sends data+ack after
   receiving packets through mirred (William sees them using OVS,
   something similar can be obtained with a kselftest [1)]
b) avoid packet drops caused by mirred hitting MIRRED_RECURSION_LIMIT
   in the above scenario

however, like Cong pointed out [2], we can't just change mirred redirect to
use the networking backlog: this would break users expectation, because it
would be impossible to know the RX status after a packet has been enqueued
to the backlog.

A possible approach can be to extend the current set of TC mirred "eaction",
so that the application can choose to use the backlog instead of the classic
ingress redirect. This would also ease future decisions of performing a
complete scrub of the skb metadata for those packets, without changing the
behavior of existing TC ingress redirect rules.

Any feedback appreciated, thanks!

[1] https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
[2] https://lore.kernel.org/netdev/YzCZMHYmk53mQ+HK@pop-os.localdomain/

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
 include/net/tc_act/tc_mirred.h        |  3 ++-
 include/uapi/linux/tc_act/tc_mirred.h |  1 +
 net/sched/act_mirred.c                | 29 +++++++++++++++++++++------
 3 files changed, 26 insertions(+), 7 deletions(-)

Comments

Jamal Hadi Salim Nov. 2, 2022, 3:34 p.m. UTC | #1
This version looks sane to me. Cong?

cheers,
jamal

On Mon, Oct 31, 2022 at 5:44 PM Davide Caratti <dcaratti@redhat.com> wrote:
>
> using TC mirrred in the ingress direction, packets are passed directly
> to the receiver in the same context. There are a couple of reasons that
> justify the proposal to use kernel networking backlog instead:
>
> a) avoid the soft lockup observed with TCP when it sends data+ack after
>    receiving packets through mirred (William sees them using OVS,
>    something similar can be obtained with a kselftest [1)]
> b) avoid packet drops caused by mirred hitting MIRRED_RECURSION_LIMIT
>    in the above scenario
>
> however, like Cong pointed out [2], we can't just change mirred redirect to
> use the networking backlog: this would break users expectation, because it
> would be impossible to know the RX status after a packet has been enqueued
> to the backlog.
>
> A possible approach can be to extend the current set of TC mirred "eaction",
> so that the application can choose to use the backlog instead of the classic
> ingress redirect. This would also ease future decisions of performing a
> complete scrub of the skb metadata for those packets, without changing the
> behavior of existing TC ingress redirect rules.
>
> Any feedback appreciated, thanks!
>
> [1] https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
> [2] https://lore.kernel.org/netdev/YzCZMHYmk53mQ+HK@pop-os.localdomain/
>
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>
> ---
>  include/net/tc_act/tc_mirred.h        |  3 ++-
>  include/uapi/linux/tc_act/tc_mirred.h |  1 +
>  net/sched/act_mirred.c                | 29 +++++++++++++++++++++------
>  3 files changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
> index 32ce8ea36950..9e10ad1adb57 100644
> --- a/include/net/tc_act/tc_mirred.h
> +++ b/include/net/tc_act/tc_mirred.h
> @@ -37,7 +37,8 @@ static inline bool is_tcf_mirred_ingress_redirect(const struct tc_action *a)
>  {
>  #ifdef CONFIG_NET_CLS_ACT
>         if (a->ops && a->ops->id == TCA_ID_MIRRED)
> -               return to_mirred(a)->tcfm_eaction == TCA_INGRESS_REDIR;
> +               return (to_mirred(a)->tcfm_eaction == TCA_INGRESS_REDIR ||
> +                       to_mirred(a)->tcfm_eaction == TCA_INGRESS_BACKLOG);
>  #endif
>         return false;
>  }
> diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
> index 2500a0005d05..e5939a3c9d86 100644
> --- a/include/uapi/linux/tc_act/tc_mirred.h
> +++ b/include/uapi/linux/tc_act/tc_mirred.h
> @@ -9,6 +9,7 @@
>  #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */
>  #define TCA_INGRESS_REDIR 3  /* packet redirect to INGRESS*/
>  #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */
> +#define TCA_INGRESS_BACKLOG 5 /* packet redirect to INGRESS (using Linux backlog) */
>
>  struct tc_mirred {
>         tc_gen;
> diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
> index b8ad6ae282c0..9526bc0ee3d2 100644
> --- a/net/sched/act_mirred.c
> +++ b/net/sched/act_mirred.c
> @@ -33,7 +33,13 @@ static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
>
>  static bool tcf_mirred_is_act_redirect(int action)
>  {
> -       return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
> +       switch (action) {
> +       case TCA_EGRESS_REDIR:
> +       case TCA_INGRESS_REDIR:
> +       case TCA_INGRESS_BACKLOG:
> +               return true;
> +       }
> +       return false;
>  }
>
>  static bool tcf_mirred_act_wants_ingress(int action)
> @@ -44,6 +50,7 @@ static bool tcf_mirred_act_wants_ingress(int action)
>                 return false;
>         case TCA_INGRESS_REDIR:
>         case TCA_INGRESS_MIRROR:
> +       case TCA_INGRESS_BACKLOG:
>                 return true;
>         default:
>                 BUG();
> @@ -130,6 +137,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
>         case TCA_EGRESS_REDIR:
>         case TCA_INGRESS_REDIR:
>         case TCA_INGRESS_MIRROR:
> +       case TCA_INGRESS_BACKLOG:
>                 break;
>         default:
>                 if (exists)
> @@ -205,14 +213,23 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
>         return err;
>  }
>
> -static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
> +static int tcf_mirred_forward(int eaction, struct sk_buff *skb)
>  {
>         int err;
>
> -       if (!want_ingress)
> +       switch (eaction) {
> +       case TCA_EGRESS_MIRROR:
> +       case TCA_EGRESS_REDIR:
>                 err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
> -       else
> +               break;
> +       case TCA_INGRESS_MIRROR:
> +       case TCA_INGRESS_REDIR:
>                 err = netif_receive_skb(skb);
> +               break;
> +       case TCA_INGRESS_BACKLOG:
> +               err = netif_rx(skb);
> +               break;
> +       }
>
>         return err;
>  }
> @@ -305,7 +322,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
>
>                 /* let's the caller reinsert the packet, if possible */
>                 if (use_reinsert) {
> -                       err = tcf_mirred_forward(want_ingress, skb);
> +                       err = tcf_mirred_forward(m_eaction, skb);
>                         if (err)
>                                 tcf_action_inc_overlimit_qstats(&m->common);
>                         __this_cpu_dec(mirred_rec_level);
> @@ -313,7 +330,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
>                 }
>         }
>
> -       err = tcf_mirred_forward(want_ingress, skb2);
> +       err = tcf_mirred_forward(m_eaction, skb2);
>         if (err) {
>  out:
>                 tcf_action_inc_overlimit_qstats(&m->common);
> --
> 2.37.3
>
Cong Wang Nov. 3, 2022, 7:12 p.m. UTC | #2
On Mon, Oct 31, 2022 at 10:44:26PM +0100, Davide Caratti wrote:
> using TC mirrred in the ingress direction, packets are passed directly
> to the receiver in the same context. There are a couple of reasons that
> justify the proposal to use kernel networking backlog instead:
> 
> a) avoid the soft lockup observed with TCP when it sends data+ack after
>    receiving packets through mirred (William sees them using OVS,
>    something similar can be obtained with a kselftest [1)]

Do you have a pointer to the original bug report? The test case you
crafted looks unreal to me, a TCP socket sending packets to itself does
not look real.


> b) avoid packet drops caused by mirred hitting MIRRED_RECURSION_LIMIT
>    in the above scenario
> 
> however, like Cong pointed out [2], we can't just change mirred redirect to
> use the networking backlog: this would break users expectation, because it
> would be impossible to know the RX status after a packet has been enqueued
> to the backlog.
> 
> A possible approach can be to extend the current set of TC mirred "eaction",
> so that the application can choose to use the backlog instead of the classic
> ingress redirect. This would also ease future decisions of performing a
> complete scrub of the skb metadata for those packets, without changing the
> behavior of existing TC ingress redirect rules.
> 

From users' point of view, why do we need to care about this
implemention detail when picking TC mirred action?


Thanks.
diff mbox series

Patch

diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 32ce8ea36950..9e10ad1adb57 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -37,7 +37,8 @@  static inline bool is_tcf_mirred_ingress_redirect(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	if (a->ops && a->ops->id == TCA_ID_MIRRED)
-		return to_mirred(a)->tcfm_eaction == TCA_INGRESS_REDIR;
+		return (to_mirred(a)->tcfm_eaction == TCA_INGRESS_REDIR ||
+			to_mirred(a)->tcfm_eaction == TCA_INGRESS_BACKLOG);
 #endif
 	return false;
 }
diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
index 2500a0005d05..e5939a3c9d86 100644
--- a/include/uapi/linux/tc_act/tc_mirred.h
+++ b/include/uapi/linux/tc_act/tc_mirred.h
@@ -9,6 +9,7 @@ 
 #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */
 #define TCA_INGRESS_REDIR 3  /* packet redirect to INGRESS*/
 #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */
+#define TCA_INGRESS_BACKLOG 5 /* packet redirect to INGRESS (using Linux backlog) */
 
 struct tc_mirred {
 	tc_gen;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b8ad6ae282c0..9526bc0ee3d2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -33,7 +33,13 @@  static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
 
 static bool tcf_mirred_is_act_redirect(int action)
 {
-	return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
+	switch (action) {
+	case TCA_EGRESS_REDIR:
+	case TCA_INGRESS_REDIR:
+	case TCA_INGRESS_BACKLOG:
+		return true;
+	}
+	return false;
 }
 
 static bool tcf_mirred_act_wants_ingress(int action)
@@ -44,6 +50,7 @@  static bool tcf_mirred_act_wants_ingress(int action)
 		return false;
 	case TCA_INGRESS_REDIR:
 	case TCA_INGRESS_MIRROR:
+	case TCA_INGRESS_BACKLOG:
 		return true;
 	default:
 		BUG();
@@ -130,6 +137,7 @@  static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	case TCA_EGRESS_REDIR:
 	case TCA_INGRESS_REDIR:
 	case TCA_INGRESS_MIRROR:
+	case TCA_INGRESS_BACKLOG:
 		break;
 	default:
 		if (exists)
@@ -205,14 +213,23 @@  static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	return err;
 }
 
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int tcf_mirred_forward(int eaction, struct sk_buff *skb)
 {
 	int err;
 
-	if (!want_ingress)
+	switch (eaction) {
+	case TCA_EGRESS_MIRROR:
+	case TCA_EGRESS_REDIR:
 		err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
-	else
+		break;
+	case TCA_INGRESS_MIRROR:
+	case TCA_INGRESS_REDIR:
 		err = netif_receive_skb(skb);
+		break;
+	case TCA_INGRESS_BACKLOG:
+		err = netif_rx(skb);
+		break;
+	}
 
 	return err;
 }
@@ -305,7 +322,7 @@  static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
 
 		/* let's the caller reinsert the packet, if possible */
 		if (use_reinsert) {
-			err = tcf_mirred_forward(want_ingress, skb);
+			err = tcf_mirred_forward(m_eaction, skb);
 			if (err)
 				tcf_action_inc_overlimit_qstats(&m->common);
 			__this_cpu_dec(mirred_rec_level);
@@ -313,7 +330,7 @@  static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
 		}
 	}
 
-	err = tcf_mirred_forward(want_ingress, skb2);
+	err = tcf_mirred_forward(m_eaction, skb2);
 	if (err) {
 out:
 		tcf_action_inc_overlimit_qstats(&m->common);