diff mbox series

netdev: set a timeout for CMD_CONNECT based connections

Message ID 20241105131059.1290437-1-prestwoj@gmail.com (mailing list archive)
State New
Headers show
Series netdev: set a timeout for CMD_CONNECT based connections | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
prestwoj/iwd-alpine-ci-fetch success Fetch PR
prestwoj/iwd-ci-gitlint success GitLint
prestwoj/iwd-alpine-ci-setupell success Prep - Setup ELL
prestwoj/iwd-ci-fetch success Fetch PR
prestwoj/iwd-ci-setupell success Prep - Setup ELL
prestwoj/iwd-alpine-ci-makedistcheck success Make Distcheck
prestwoj/iwd-alpine-ci-incremental_build success Incremental build not run PASS
prestwoj/iwd-ci-makedistcheck success Make Distcheck
prestwoj/iwd-ci-incremental_build success Incremental build not run PASS
prestwoj/iwd-ci-build success Build - Configure
prestwoj/iwd-alpine-ci-build success Build - Configure
prestwoj/iwd-ci-makecheckvalgrind success Make Check w/Valgrind
prestwoj/iwd-ci-makecheck success Make Check
prestwoj/iwd-ci-clang success clang PASS
prestwoj/iwd-alpine-ci-makecheckvalgrind success Make Check w/Valgrind
prestwoj/iwd-alpine-ci-makecheck success Make Check
prestwoj/iwd-ci-testrunner success test-runner PASS

Commit Message

James Prestwood Nov. 5, 2024, 1:10 p.m. UTC
Based on a users report/logs it appears some drivers (brcmfmac in
this case) may have issues which cause them to never respond with
a CMD_CONNECT event, leaving IWD hung waiting indefinitely for
this.

Since IWD cannot control driver behavior its safest to set a timer
in userspace as a last resort in case this happens.
---
 src/netdev.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/src/netdev.c b/src/netdev.c
index 2eebf457..8aeae633 100644
--- a/src/netdev.c
+++ b/src/netdev.c
@@ -70,6 +70,8 @@ 
 #define ENOTSUPP 524
 #endif
 
+#define NETDEV_CONNECT_TIMEOUT 5
+
 enum connection_type {
 	CONNECTION_TYPE_SOFTMAC,
 	CONNECTION_TYPE_FULLMAC,
@@ -140,6 +142,7 @@  struct netdev {
 	struct l_timeout *sa_query_timeout;
 	struct l_timeout *sa_query_delay;
 	struct l_timeout *group_handshake_timeout;
+	struct l_timeout *connect_timeout;
 	uint16_t sa_query_id;
 	int8_t rssi_levels[16];
 	uint8_t rssi_levels_num;
@@ -859,6 +862,11 @@  static void netdev_connect_free(struct netdev *netdev)
 		netdev->group_handshake_timeout = NULL;
 	}
 
+	if (netdev->connect_timeout) {
+		l_timeout_remove(netdev->connect_timeout);
+		netdev->connect_timeout = NULL;
+	}
+
 	netdev->associated = false;
 	netdev->operational = false;
 	netdev->connected = false;
@@ -2561,6 +2569,33 @@  static void netdev_cmd_connect_cb(struct l_genl_msg *msg, void *user_data)
 				MMPDU_STATUS_CODE_UNSPECIFIED);
 }
 
+static void netdev_connect_timeout(struct l_timeout *timeout, void *user_data)
+{
+	struct netdev *netdev = user_data;
+
+	l_timeout_remove(netdev->connect_timeout);
+	netdev->connect_timeout = NULL;
+
+	l_error("Kernel never sent CMD_CONNECT event! This is a bug, please "
+		"report upstream");
+
+	/*
+	 * Set this now in case the connect event does arrive in between the
+	 * deauth and actually cleaning up the connection members.
+	 */
+	netdev->connected = false;
+
+	/*
+	 * Since the lack of the connect event indicates a kernel/driver bug its
+	 * hard to say what state the kernel in. Its probably safest to deauth
+	 * here just in case the kernel is in some state between authentication
+	 * and association.
+	 */
+	netdev_deauth_and_fail_connection(netdev,
+					NETDEV_RESULT_ASSOCIATION_FAILED,
+					MMPDU_STATUS_CODE_UNSPECIFIED);
+}
+
 static bool netdev_retry_owe(struct netdev *netdev)
 {
 	struct l_genl_msg *connect_cmd;
@@ -2578,11 +2613,19 @@  static bool netdev_retry_owe(struct netdev *netdev)
 						netdev_cmd_connect_cb, netdev,
 						NULL);
 
-	if (netdev->connect_cmd_id > 0)
-		return true;
+	if (!netdev->connect_cmd_id) {
+		l_genl_msg_unref(connect_cmd);
+		return false;
+	}
 
-	l_genl_msg_unref(connect_cmd);
-	return false;
+	if (netdev->connect_timeout)
+		l_timeout_remove(netdev->connect_timeout);
+
+	netdev->connect_timeout = l_timeout_create(NETDEV_CONNECT_TIMEOUT,
+							netdev_connect_timeout,
+							netdev, NULL);
+
+	return true;
 }
 
 static void netdev_connect_event(struct l_genl_msg *msg, struct netdev *netdev)
@@ -2602,6 +2645,12 @@  static void netdev_connect_event(struct l_genl_msg *msg, struct netdev *netdev)
 
 	l_debug("");
 
+	/* Clear the timer as soon as we receive this event */
+	if (netdev->connect_timeout) {
+		l_timeout_remove(netdev->connect_timeout);
+		netdev->connect_timeout = NULL;
+	}
+
 	if (netdev->aborting)
 		return;
 
@@ -3491,6 +3540,11 @@  static int netdev_begin_connection(struct netdev *netdev)
 			goto failed;
 
 		netdev->connect_cmd = NULL;
+
+		netdev->connect_timeout = l_timeout_create(
+						NETDEV_CONNECT_TIMEOUT,
+						netdev_connect_timeout,
+						netdev, NULL);
 	}
 
 	/*