diff mbox series

[net,v3,1/2] net/sched: act_mirred: use the backlog for mirred ingress

Message ID 20240215143346.1715054-1-kuba@kernel.org (mailing list archive)
State Accepted
Commit 52f671db18823089a02f07efc04efdb2272ddc17
Delegated to: Netdev Maintainers
Headers show
Series [net,v3,1/2] net/sched: act_mirred: use the backlog for mirred ingress | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 976 this patch: 976
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 2 maintainers not CCed: shuah@kernel.org linux-kselftest@vger.kernel.org
netdev/build_clang success Errors and warnings before: 993 this patch: 993
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success net selftest script(s) already in Makefile
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 993 this patch: 993
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 41 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-02-16--06-00 (tests: 1443)

Commit Message

Jakub Kicinski Feb. 15, 2024, 2:33 p.m. UTC
The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog
for nested calls to mirred ingress") hangs our testing VMs every 10 or so
runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by
lockdep.

The problem as previously described by Davide (see Link) is that
if we reverse flow of traffic with the redirect (egress -> ingress)
we may reach the same socket which generated the packet. And we may
still be holding its socket lock. The common solution to such deadlocks
is to put the packet in the Rx backlog, rather than run the Rx path
inline. Do that for all egress -> ingress reversals, not just once
we started to nest mirred calls.

In the past there was a concern that the backlog indirection will
lead to loss of error reporting / less accurate stats. But the current
workaround does not seem to address the issue.

Fixes: 53592b364001 ("net/sched: act_mirred: Implement ingress actions")
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Suggested-by: Davide Caratti <dcaratti@redhat.com>
Link: https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
CC: jhs@mojatatu.com
CC: xiyou.wangcong@gmail.com
CC: jiri@resnulli.us
CC: shmulik.ladkani@gmail.com
---
 net/sched/act_mirred.c                             | 14 +++++---------
 .../testing/selftests/net/forwarding/tc_actions.sh |  3 ---
 2 files changed, 5 insertions(+), 12 deletions(-)

Comments

Jamal Hadi Salim Feb. 15, 2024, 5:33 p.m. UTC | #1
On Thu, Feb 15, 2024 at 9:33 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog
> for nested calls to mirred ingress") hangs our testing VMs every 10 or so
> runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by
> lockdep.
>
> The problem as previously described by Davide (see Link) is that
> if we reverse flow of traffic with the redirect (egress -> ingress)
> we may reach the same socket which generated the packet. And we may
> still be holding its socket lock. The common solution to such deadlocks
> is to put the packet in the Rx backlog, rather than run the Rx path
> inline. Do that for all egress -> ingress reversals, not just once
> we started to nest mirred calls.
>
> In the past there was a concern that the backlog indirection will
> lead to loss of error reporting / less accurate stats. But the current
> workaround does not seem to address the issue.
>
> Fixes: 53592b364001 ("net/sched: act_mirred: Implement ingress actions")
> Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Suggested-by: Davide Caratti <dcaratti@redhat.com>
> Link: https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>

cheers,
jamal

> ---
> CC: jhs@mojatatu.com
> CC: xiyou.wangcong@gmail.com
> CC: jiri@resnulli.us
> CC: shmulik.ladkani@gmail.com
> ---
>  net/sched/act_mirred.c                             | 14 +++++---------
>  .../testing/selftests/net/forwarding/tc_actions.sh |  3 ---
>  2 files changed, 5 insertions(+), 12 deletions(-)
>
> diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
> index 0a1a9e40f237..291d47c9eb69 100644
> --- a/net/sched/act_mirred.c
> +++ b/net/sched/act_mirred.c
> @@ -232,18 +232,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
>         return err;
>  }
>
> -static bool is_mirred_nested(void)
> -{
> -       return unlikely(__this_cpu_read(mirred_nest_level) > 1);
> -}
> -
> -static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
> +static int
> +tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
>  {
>         int err;
>
>         if (!want_ingress)
>                 err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
> -       else if (is_mirred_nested())
> +       else if (!at_ingress)
>                 err = netif_rx(skb);
>         else
>                 err = netif_receive_skb(skb);
> @@ -319,9 +315,9 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
>
>                 skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
>
> -               err = tcf_mirred_forward(want_ingress, skb_to_send);
> +               err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
>         } else {
> -               err = tcf_mirred_forward(want_ingress, skb_to_send);
> +               err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
>         }
>
>         if (err) {
> diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
> index b0f5e55d2d0b..589629636502 100755
> --- a/tools/testing/selftests/net/forwarding/tc_actions.sh
> +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
> @@ -235,9 +235,6 @@ mirred_egress_to_ingress_tcp_test()
>         check_err $? "didn't mirred redirect ICMP"
>         tc_check_packets "dev $h1 ingress" 102 10
>         check_err $? "didn't drop mirred ICMP"
> -       local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
> -       test ${overlimits} = 10
> -       check_err $? "wrong overlimits, expected 10 got ${overlimits}"
>
>         tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
>         tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
> --
> 2.43.0
>
patchwork-bot+netdevbpf@kernel.org Feb. 16, 2024, 10:20 a.m. UTC | #2
Hello:

This series was applied to netdev/net.git (main)
by David S. Miller <davem@davemloft.net>:

On Thu, 15 Feb 2024 06:33:45 -0800 you wrote:
> The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog
> for nested calls to mirred ingress") hangs our testing VMs every 10 or so
> runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by
> lockdep.
> 
> The problem as previously described by Davide (see Link) is that
> if we reverse flow of traffic with the redirect (egress -> ingress)
> we may reach the same socket which generated the packet. And we may
> still be holding its socket lock. The common solution to such deadlocks
> is to put the packet in the Rx backlog, rather than run the Rx path
> inline. Do that for all egress -> ingress reversals, not just once
> we started to nest mirred calls.
> 
> [...]

Here is the summary with links:
  - [net,v3,1/2] net/sched: act_mirred: use the backlog for mirred ingress
    https://git.kernel.org/netdev/net/c/52f671db1882
  - [net,v3,2/2] net/sched: act_mirred: don't override retval if we already lost the skb
    https://git.kernel.org/netdev/net/c/166c2c8a6a4d

You are awesome, thank you!
diff mbox series

Patch

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0a1a9e40f237..291d47c9eb69 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -232,18 +232,14 @@  static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	return err;
 }
 
-static bool is_mirred_nested(void)
-{
-	return unlikely(__this_cpu_read(mirred_nest_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
 {
 	int err;
 
 	if (!want_ingress)
 		err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
-	else if (is_mirred_nested())
+	else if (!at_ingress)
 		err = netif_rx(skb);
 	else
 		err = netif_receive_skb(skb);
@@ -319,9 +315,9 @@  static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
 
 		skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
 
-		err = tcf_mirred_forward(want_ingress, skb_to_send);
+		err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
 	} else {
-		err = tcf_mirred_forward(want_ingress, skb_to_send);
+		err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
 	}
 
 	if (err) {
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index b0f5e55d2d0b..589629636502 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -235,9 +235,6 @@  mirred_egress_to_ingress_tcp_test()
 	check_err $? "didn't mirred redirect ICMP"
 	tc_check_packets "dev $h1 ingress" 102 10
 	check_err $? "didn't drop mirred ICMP"
-	local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
-	test ${overlimits} = 10
-	check_err $? "wrong overlimits, expected 10 got ${overlimits}"
 
 	tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
 	tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower