diff mbox

teamd: add port_master_ifindex_changed for teamd_event_watch_ops

Message ID 1531965888-28201-1-git-send-email-liuhangbin@gmail.com (mailing list archive)
State New
Headers show

Commit Message

Hangbin Liu July 19, 2018, 2:04 a.m. UTC
When multi interfaces up, there is a possibility that the link states
notification came later after interface up, which cause the master_ifindex
not update timely. Then the teamd_port_present() checking in
ab_link_watch_handler() -> teamd_for_each_tdport() -> teamd_get_next_tdport()
will failed and we will not set the active port.

Fix it by adding a new teamd_event_watch_ops port_master_ifindex_changed for
ab mode.

Reproducer:

\#/bin/bash
WAIT=2
COUNT=0

start_team()
{
        num=$1
        teamd -o -n -U -d -t team$num -c '{"runner": {"name": "activebackup"},"link_watch": {"name": "ethtool"}}' -gg
        teamdctl team$num port add eth$num
}

while :; do
        echo "-----------------------------------------------------------"
        let "COUNT++"
        echo "Loop $COUNT"
        if teamdctl team1 state | grep -q "active port: eth1" && \
                teamdctl team2 state | grep -q "active port: eth2"; then
                echo "Pass"
        else
                echo "FAIL"
                exit 1
        fi

        teamd -k -t team1
        teamd -k -t team2
        sleep "$WAIT"
        start_team 1 &
        start_team 2 &
        sleep "$WAIT"
done

Result: Usually in 10 rounds, we will get the failure,
\# teamdctl team1 state
setup:
  runner: activebackup
ports:
  eth1
    link watches:
      link summary: up
      instance[link_watch_0]:
        name: ethtool
        link: up
        down count: 0
runner:
  active port:

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>

---
 teamd/teamd.h                     |  5 +++++
 teamd/teamd_events.c              | 19 +++++++++++++++++++
 teamd/teamd_ifinfo_watch.c        |  5 +++++
 teamd/teamd_runner_activebackup.c |  8 ++++++++
 4 files changed, 37 insertions(+)

-- 
2.5.5

Comments

Jiri Pirko July 31, 2018, 10:14 a.m. UTC | #1
Thu, Jul 19, 2018 at 04:04:48AM CEST, liuhangbin@gmail.com wrote:
>When multi interfaces up, there is a possibility that the link states
>notification came later after interface up, which cause the master_ifindex
>not update timely. Then the teamd_port_present() checking in
>ab_link_watch_handler() -> teamd_for_each_tdport() -> teamd_get_next_tdport()
>will failed and we will not set the active port.

I don't understand this para. Could you please re-phrase? It would help
me understand what is this about. Thanks!


>
>Fix it by adding a new teamd_event_watch_ops port_master_ifindex_changed for
>ab mode.

Other modes are not affected?
Jamie Bainbridge July 31, 2018, 11:34 p.m. UTC | #2
On 31 July 2018 at 20:14, Jiri Pirko <jiri@resnulli.us> wrote:
> Thu, Jul 19, 2018 at 04:04:48AM CEST, liuhangbin@gmail.com wrote:
>>When multi interfaces up, there is a possibility that the link states
>>notification came later after interface up, which cause the master_ifindex
>>not update timely. Then the teamd_port_present() checking in
>>ab_link_watch_handler() -> teamd_for_each_tdport() -> teamd_get_next_tdport()
>>will failed and we will not set the active port.
>
> I don't understand this para. Could you please re-phrase? It would help
> me understand what is this about. Thanks!

If you configure two active-backup teams, one slave each, put them
down and up in a loop and test traffic each loop, eventually one of
the teams will not accept/transmit traffic as there is no active port.
Like this:

#!/bin/bash
WAIT=2
COUNT=0
while :; do
    echo "-----------------------------------------------------------"
    let "COUNT++"
    echo "Loop $COUNT"
    for ADDR in "10.0.1.1" "10.0.2.1"; do
        if ! ping -q -W1 -c1 "$ADDR" >/dev/null 2>&1; then
            echo "FAIL on $ADDR"
            echo
            for TEAM in team1 team2; do teamdctl "$TEAM" state dump |
egrep "active_port|\"pid\""; done
            exit 1
        else
            echo "Pass on $ADDR"
        fi
    done
    nmcli con down team1 &
    nmcli con down team2 &
    sleep "$WAIT"
    nmcli con up   team1 &
    nmcli con up   team2 &
    sleep "$WAIT"
done

Failure as follows:

# teamdctl teamX state dump
    "runner": {
        "active_port": ""
    },

This happens because teamd_port_present() fails in the codepath
Hangbin described above, so the teamd_for_each_tdport() for loop can
never run against the tdports bound to the ctx and a port is never set
active. The tdport is on the ctx port_obj_list at the time.

>>Fix it by adding a new teamd_event_watch_ops port_master_ifindex_changed for
>>ab mode.
>
> Other modes are not affected?

I could not reproduce with broadcast or roundrobin runners.

Jamie
Jiri Pirko Aug. 2, 2018, 3:42 p.m. UTC | #3
Wed, Aug 01, 2018 at 01:34:59AM CEST, jamie.bainbridge@gmail.com wrote:
>On 31 July 2018 at 20:14, Jiri Pirko <jiri@resnulli.us> wrote:
>> Thu, Jul 19, 2018 at 04:04:48AM CEST, liuhangbin@gmail.com wrote:
>>>When multi interfaces up, there is a possibility that the link states
>>>notification came later after interface up, which cause the master_ifindex
>>>not update timely. Then the teamd_port_present() checking in
>>>ab_link_watch_handler() -> teamd_for_each_tdport() -> teamd_get_next_tdport()
>>>will failed and we will not set the active port.
>>
>> I don't understand this para. Could you please re-phrase? It would help
>> me understand what is this about. Thanks!
>
>If you configure two active-backup teams, one slave each, put them
>down and up in a loop and test traffic each loop, eventually one of
>the teams will not accept/transmit traffic as there is no active port.

Could you please update the patch description?
diff mbox

Patch

diff --git a/teamd/teamd.h b/teamd/teamd.h
index 5dbfb9b..3934fc2 100644
--- a/teamd/teamd.h
+++ b/teamd/teamd.h
@@ -189,6 +189,9 @@  struct teamd_event_watch_ops {
 				   struct teamd_port *tdport, void *priv);
 	int (*port_ifname_changed)(struct teamd_context *ctx,
 				   struct teamd_port *tdport, void *priv);
+	int (*port_master_ifindex_changed)(struct teamd_context *ctx,
+					   struct teamd_port *tdport,
+					   void *priv);
 	int (*option_changed)(struct teamd_context *ctx,
 			      struct team_option *option, void *priv);
 	char *option_changed_match_name;
@@ -208,6 +211,8 @@  int teamd_event_ifinfo_hwaddr_changed(struct teamd_context *ctx,
 				      struct team_ifinfo *ifinfo);
 int teamd_event_ifinfo_ifname_changed(struct teamd_context *ctx,
 				      struct team_ifinfo *ifinfo);
+int teamd_event_ifinfo_master_ifindex_changed(struct teamd_context *ctx,
+					      struct team_ifinfo *ifinfo);
 int teamd_event_ifinfo_admin_state_changed(struct teamd_context *ctx,
 					   struct team_ifinfo *ifinfo);
 int teamd_events_init(struct teamd_context *ctx);
diff --git a/teamd/teamd_events.c b/teamd/teamd_events.c
index 1a95974..65aa46a 100644
--- a/teamd/teamd_events.c
+++ b/teamd/teamd_events.c
@@ -167,6 +167,25 @@  int teamd_event_ifinfo_ifname_changed(struct teamd_context *ctx,
 	return 0;
 }
 
+int teamd_event_ifinfo_master_ifindex_changed(struct teamd_context *ctx,
+					      struct team_ifinfo *ifinfo)
+{
+	struct event_watch_item *watch;
+	uint32_t ifindex = team_get_ifinfo_ifindex(ifinfo);
+	struct teamd_port *tdport = teamd_get_port(ctx, ifindex);
+	int err;
+
+	list_for_each_node_entry(watch, &ctx->event_watch_list, list) {
+		if (watch->ops->port_master_ifindex_changed && tdport) {
+			err = watch->ops->port_master_ifindex_changed(ctx, tdport,
+								      watch->priv);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
 int teamd_event_ifinfo_admin_state_changed(struct teamd_context *ctx,
 					   struct team_ifinfo *ifinfo)
 {
diff --git a/teamd/teamd_ifinfo_watch.c b/teamd/teamd_ifinfo_watch.c
index f334ff6..6a19532 100644
--- a/teamd/teamd_ifinfo_watch.c
+++ b/teamd/teamd_ifinfo_watch.c
@@ -59,6 +59,11 @@  static int ifinfo_change_handler_func(struct team_handle *th, void *priv,
 			if (err)
 				return err;
 		}
+		if (team_is_ifinfo_master_ifindex_changed(ifinfo)) {
+			err = teamd_event_ifinfo_master_ifindex_changed(ctx, ifinfo);
+			if (err)
+				return err;
+		}
 	}
 	return 0;
 }
diff --git a/teamd/teamd_runner_activebackup.c b/teamd/teamd_runner_activebackup.c
index 8a3447f..f92d341 100644
--- a/teamd/teamd_runner_activebackup.c
+++ b/teamd/teamd_runner_activebackup.c
@@ -520,6 +520,13 @@  static int ab_event_watch_port_link_changed(struct teamd_context *ctx,
 	return ab_link_watch_handler(ctx, priv);
 }
 
+static int ab_event_watch_port_master_ifindex_changed(struct teamd_context *ctx,
+						      struct teamd_port *tdport,
+						      void *priv)
+{
+	return ab_link_watch_handler(ctx, priv);
+}
+
 static int ab_event_watch_prio_option_changed(struct teamd_context *ctx,
 					      struct team_option *option,
 					      void *priv)
@@ -532,6 +539,7 @@  static const struct teamd_event_watch_ops ab_event_watch_ops = {
 	.port_hwaddr_changed = ab_event_watch_port_hwaddr_changed,
 	.port_added = ab_event_watch_port_added,
 	.port_link_changed = ab_event_watch_port_link_changed,
+	.port_master_ifindex_changed = ab_event_watch_port_master_ifindex_changed,
 	.option_changed = ab_event_watch_prio_option_changed,
 	.option_changed_match_name = "priority",
 };