diff mbox series

netdev: try catching buggy driver behavior to avoid hangs

Message ID 20221107184944.675454-1-prestwoj@gmail.com (mailing list archive)
State New
Headers show
Series netdev: try catching buggy driver behavior to avoid hangs | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
prestwoj/iwd-alpine-ci-fetch success Fetch PR
prestwoj/iwd-ci-gitlint success GitLint
prestwoj/iwd-ci-fetch success Fetch PR
prestwoj/iwd-alpine-ci-makedistcheck success Make Distcheck
prestwoj/iwd-alpine-ci-incremental_build success Incremental build not run PASS
prestwoj/iwd-alpine-ci-build success Build - Configure
prestwoj/iwd-ci-makedistcheck success Make Distcheck
prestwoj/iwd-ci-incremental_build success Incremental build not run PASS
prestwoj/iwd-ci-build success Build - Configure
prestwoj/iwd-alpine-ci-makecheckvalgrind success Make Check w/Valgrind
prestwoj/iwd-alpine-ci-makecheck success Make Check
prestwoj/iwd-ci-clang success clang PASS
prestwoj/iwd-ci-makecheckvalgrind success Make Check w/Valgrind
prestwoj/iwd-ci-makecheck success Make Check
prestwoj/iwd-ci-testrunner success test-runner PASS

Commit Message

James Prestwood Nov. 7, 2022, 6:49 p.m. UTC
It was reported that IWD hangs during roaming due to the kernel never
sending any expected events after authentication. This was likely
due to buggy firmware but ultimately got IWD into an unrecoverable
state waiting for an event which never came.

The only indication of this to userspace was a DEL_STATION event
which happens after the successful authenticate event. Now IWD will
watch for this and if the conditions are right IWD will fail the
connection.
---
 src/netdev.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/src/netdev.c b/src/netdev.c
index cda8d183..817b8737 100644
--- a/src/netdev.c
+++ b/src/netdev.c
@@ -4933,13 +4933,12 @@  static void netdev_station_event(struct l_genl_msg *msg,
 					struct netdev *netdev, bool added)
 {
 	struct l_genl_attr attr;
+	struct l_genl_attr nested;
 	uint16_t type;
 	uint16_t len;
 	const void *data;
 	const uint8_t *mac = NULL;
-
-	if (netdev_get_iftype(netdev) != NETDEV_IFTYPE_ADHOC)
-		return;
+	uint32_t connected_time = 0;
 
 	if (!l_genl_attr_init(&attr, msg))
 		return;
@@ -4948,6 +4947,22 @@  static void netdev_station_event(struct l_genl_msg *msg,
 		switch (type) {
 		case NL80211_ATTR_MAC:
 			mac = data;
+			break;
+		case NL80211_ATTR_STA_INFO:
+			if (!l_genl_attr_recurse(&attr, &nested))
+				continue;
+
+			while (l_genl_attr_next(&nested, &type, &len, &data)) {
+				if (type != NL80211_STA_INFO_CONNECTED_TIME)
+					continue;
+
+				if (len != 4)
+					continue;
+
+				connected_time = l_get_u32(data);
+				break;
+			}
+
 			break;
 		}
 	}
@@ -4958,6 +4973,49 @@  static void netdev_station_event(struct l_genl_msg *msg,
 		return;
 	}
 
+	/*
+	 * Check in case buggy drivers never send the events we expect. This
+	 * has been seen on iwlwifi where a 'connection loss' in the firmware
+	 * results in no events after authentication. The only indication of
+	 * failure we see is a DEL_STATION after the successful authenticate
+	 * event.
+	 *
+	 * Any protocol using CMD_AUTHENTICATE/CMD_ASSOCIATE could end up in
+	 * this situation:
+	 *  - SAE, during auth or assoc
+	 *  - FILS, during auth or assoc
+	 *  - FT, during associate (since CMD_AUTH is not used)
+	 *  - Reassociation, during assoc
+	 *
+	 * It should be noted that protocols such as SAE and OWE may reject
+	 * authentication with the intent of retrying (status 77) which does
+	 * result in a DEL_STATION event. In these cases the connection time is
+	 * zero, and the event can be ignored.
+	 *
+	 * If the following conditions are met we can assume this is a buggy
+	 * driver:
+	 *  - Current handshake exists
+	 *  - Handshake is not for an authenticator
+	 *  - Connected time is non-zero
+	 *  - The STA address matches our handshake authenticator address
+	 *  - Currently running auth/assoc protocol (auth-proto, ft, or reassoc)
+	 */
+	if (netdev->handshake && !netdev->handshake->authenticator &&
+			!added && connected_time &&
+			!memcmp(mac, netdev->handshake->aa, 6) &&
+			(netdev->ap || netdev->in_ft || netdev->in_reassoc)) {
+		l_warn("Kernel never sent a connect event to indicate failure! "
+			"This is a kernel bug and needs to be fixed");
+		netdev_connect_failed(netdev, netdev->associated ?
+					NETDEV_RESULT_ASSOCIATION_FAILED :
+					NETDEV_RESULT_AUTHENTICATION_FAILED,
+					MMPDU_STATUS_CODE_UNSPECIFIED);
+		return;
+	}
+
+	if (netdev_get_iftype(netdev) != NETDEV_IFTYPE_ADHOC)
+		return;
+
 	WATCHLIST_NOTIFY(&netdev->station_watches,
 			netdev_station_watch_func_t, netdev, mac, added);
 }