diff mbox series

[v4,1/2] netdev: fall back to RSSI polling if SET_CQM fails

Message ID 20240903180653.205405-1-prestwoj@gmail.com (mailing list archive)
State Accepted, archived
Headers show
Series [v4,1/2] netdev: fall back to RSSI polling if SET_CQM fails | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
prestwoj/iwd-alpine-ci-fetch success Fetch PR
prestwoj/iwd-ci-gitlint success GitLint
prestwoj/iwd-ci-fetch success Fetch PR
prestwoj/iwd-alpine-ci-setupell success Prep - Setup ELL
prestwoj/iwd-ci-setupell success Prep - Setup ELL
prestwoj/iwd-ci-makedistcheck success Make Distcheck
prestwoj/iwd-alpine-ci-makedistcheck success Make Distcheck
prestwoj/iwd-ci-build success Build - Configure
prestwoj/iwd-alpine-ci-build success Build - Configure
prestwoj/iwd-ci-makecheckvalgrind success Make Check w/Valgrind
prestwoj/iwd-ci-makecheck success Make Check
prestwoj/iwd-ci-clang success clang PASS
prestwoj/iwd-alpine-ci-makecheckvalgrind success Make Check w/Valgrind
prestwoj/iwd-alpine-ci-makecheck success Make Check
prestwoj/iwd-ci-incremental_build success Incremental Build with patches
prestwoj/iwd-alpine-ci-incremental_build success Incremental Build with patches
prestwoj/iwd-ci-testrunner success test-runner PASS

Commit Message

James Prestwood Sept. 3, 2024, 6:06 p.m. UTC
Some drivers fail to set a CQM threshold and report not supported.
Its unclear exactly why but if this happens roaming is effectively
broken.

To work around this enable RSSI polling if -ENOTSUP is returned.
The polling callback has been changed to emit the HIGH/LOW signal
threshold events instead of just the RSSI level index, just as if
a CQM event came from the kernel.
---
 src/netdev.c | 158 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 108 insertions(+), 50 deletions(-)

v4:
 * Track the command IDs for SET_CQM

Comments

Denis Kenzior Sept. 5, 2024, 3:26 a.m. UTC | #1
Hi James,

On 9/3/24 1:06 PM, James Prestwood wrote:
> Some drivers fail to set a CQM threshold and report not supported.
> Its unclear exactly why but if this happens roaming is effectively
> broken.
> 
> To work around this enable RSSI polling if -ENOTSUP is returned.
> The polling callback has been changed to emit the HIGH/LOW signal
> threshold events instead of just the RSSI level index, just as if
> a CQM event came from the kernel.
> ---
>   src/netdev.c | 158 +++++++++++++++++++++++++++++++++++----------------
>   1 file changed, 108 insertions(+), 50 deletions(-)
> 
> v4:
>   * Track the command IDs for SET_CQM
> 

Both applied, thanks.

Regards,
-Denis
diff mbox series

Patch

diff --git a/src/netdev.c b/src/netdev.c
index 73fbf0c1..14fe41b4 100644
--- a/src/netdev.c
+++ b/src/netdev.c
@@ -133,6 +133,7 @@  struct netdev {
 	uint32_t get_oci_cmd_id;
 	uint32_t get_link_cmd_id;
 	uint32_t power_save_cmd_id;
+	uint32_t set_cqm_cmd_id;
 	enum netdev_result result;
 	uint16_t last_code; /* reason or status, depending on result */
 	struct l_timeout *neighbor_report_timeout;
@@ -190,6 +191,7 @@  struct netdev {
 	bool retry_auth : 1;
 	bool in_reassoc : 1;
 	bool privacy : 1;
+	bool cqm_poll_fallback : 1;
 };
 
 struct netdev_preauth_state {
@@ -650,6 +652,47 @@  static void netdev_set_rssi_level_idx(struct netdev *netdev)
 	netdev->cur_rssi_level_idx = new_level;
 }
 
+static void netdev_cqm_event_rssi_value(struct netdev *netdev, int rssi_val)
+{
+	bool new_rssi_low;
+	uint8_t prev_rssi_level_idx = netdev->cur_rssi_level_idx;
+	int threshold = netdev->frequency > 4000 ?
+					netdev->low_signal_threshold_5ghz :
+					netdev->low_signal_threshold;
+
+	if (!netdev->connected)
+		return;
+
+	if (rssi_val > 127)
+		rssi_val = 127;
+	else if (rssi_val < -127)
+		rssi_val = -127;
+
+	netdev->cur_rssi = rssi_val;
+
+	if (!netdev->event_filter)
+		return;
+
+	new_rssi_low = rssi_val < threshold;
+	if (netdev->cur_rssi_low != new_rssi_low) {
+		int event = new_rssi_low ?
+			NETDEV_EVENT_RSSI_THRESHOLD_LOW :
+			NETDEV_EVENT_RSSI_THRESHOLD_HIGH;
+
+		netdev->cur_rssi_low = new_rssi_low;
+		netdev->event_filter(netdev, event, NULL, netdev->user_data);
+	}
+
+	if (!netdev->rssi_levels_num)
+		return;
+
+	netdev_set_rssi_level_idx(netdev);
+	if (netdev->cur_rssi_level_idx != prev_rssi_level_idx)
+		netdev->event_filter(netdev, NETDEV_EVENT_RSSI_LEVEL_NOTIFY,
+					&netdev->cur_rssi_level_idx,
+					netdev->user_data);
+}
+
 static void netdev_rssi_poll_cb(struct l_genl_msg *msg, void *user_data)
 {
 	struct netdev *netdev = user_data;
@@ -686,11 +729,16 @@  static void netdev_rssi_poll_cb(struct l_genl_msg *msg, void *user_data)
 	netdev->cur_rssi = info.cur_rssi;
 
 	/*
-	 * Note we don't have to handle LOW_SIGNAL_THRESHOLD here.  The
-	 * CQM single threshold RSSI monitoring should work even if the
-	 * kernel driver doesn't support multiple thresholds.  So the
-	 * polling only handles the client-supplied threshold list.
+	 * If the CMD_SET_CQM call failed RSSI polling was started. In this case
+	 * we should behave just like its a CQM event and check both the RSSI
+	 * level indexes and the HIGH/LOW thresholds.
 	 */
+	if (netdev->cqm_poll_fallback) {
+		netdev_cqm_event_rssi_value(netdev, info.cur_rssi);
+		goto done;
+	}
+
+	/* Otherwise just update the level notifications, CQM events work */
 	netdev_set_rssi_level_idx(netdev);
 	if (netdev->cur_rssi_level_idx != prev_rssi_level_idx)
 		netdev->event_filter(netdev, NETDEV_EVENT_RSSI_LEVEL_NOTIFY,
@@ -1031,6 +1079,11 @@  static void netdev_free(void *data)
 		netdev->get_station_cmd_id = 0;
 	}
 
+	if (netdev->set_cqm_cmd_id) {
+		l_genl_family_cancel(nl80211, netdev->set_cqm_cmd_id);
+		netdev->set_cqm_cmd_id = 0;
+	}
+
 	if (netdev->fw_roam_bss)
 		scan_bss_free(netdev->fw_roam_bss);
 
@@ -1039,6 +1092,9 @@  static void netdev_free(void *data)
 		netdev->get_link_cmd_id = 0;
 	}
 
+	if (netdev->rssi_poll_timeout)
+		l_timeout_remove(netdev->rssi_poll_timeout);
+
 	scan_wdev_remove(netdev->wdev_id);
 
 	watchlist_destroy(&netdev->station_watches);
@@ -1093,47 +1149,6 @@  static void netdev_cqm_event_rssi_threshold(struct netdev *netdev,
 	netdev->event_filter(netdev, event, NULL, netdev->user_data);
 }
 
-static void netdev_cqm_event_rssi_value(struct netdev *netdev, int rssi_val)
-{
-	bool new_rssi_low;
-	uint8_t prev_rssi_level_idx = netdev->cur_rssi_level_idx;
-	int threshold = netdev->frequency > 4000 ?
-					netdev->low_signal_threshold_5ghz :
-					netdev->low_signal_threshold;
-
-	if (!netdev->connected)
-		return;
-
-	if (rssi_val > 127)
-		rssi_val = 127;
-	else if (rssi_val < -127)
-		rssi_val = -127;
-
-	netdev->cur_rssi = rssi_val;
-
-	if (!netdev->event_filter)
-		return;
-
-	new_rssi_low = rssi_val < threshold;
-	if (netdev->cur_rssi_low != new_rssi_low) {
-		int event = new_rssi_low ?
-			NETDEV_EVENT_RSSI_THRESHOLD_LOW :
-			NETDEV_EVENT_RSSI_THRESHOLD_HIGH;
-
-		netdev->cur_rssi_low = new_rssi_low;
-		netdev->event_filter(netdev, event, NULL, netdev->user_data);
-	}
-
-	if (!netdev->rssi_levels_num)
-		return;
-
-	netdev_set_rssi_level_idx(netdev);
-	if (netdev->cur_rssi_level_idx != prev_rssi_level_idx)
-		netdev->event_filter(netdev, NETDEV_EVENT_RSSI_LEVEL_NOTIFY,
-					&netdev->cur_rssi_level_idx,
-					netdev->user_data);
-}
-
 static void netdev_cqm_event(struct l_genl_msg *msg, struct netdev *netdev)
 {
 	struct l_genl_attr attr;
@@ -3644,11 +3659,50 @@  static struct l_genl_msg *netdev_build_cmd_cqm_rssi_update(
 
 static void netdev_cmd_set_cqm_cb(struct l_genl_msg *msg, void *user_data)
 {
+	struct netdev *netdev = user_data;
 	int err = l_genl_msg_get_error(msg);
 	const char *ext_error;
 
-	if (err >= 0)
+	netdev->set_cqm_cmd_id = 0;
+
+	if (err >= 0) {
+		/*
+		 * Looking at some driver code it appears that the -ENOTSUP CQM
+		 * failure could be transient. Just in case, reset the fallback
+		 * flag if CQM happens to start working again.
+		 */
+		if (netdev->cqm_poll_fallback) {
+			l_debug("CMD_SET_CQM succeeded, stop polling fallback");
+
+			if (netdev->rssi_poll_timeout) {
+				l_timeout_remove(netdev->rssi_poll_timeout);
+				netdev->rssi_poll_timeout = NULL;
+			}
+
+			netdev->cqm_poll_fallback = false;
+		}
+
+		return;
+	}
+
+	/*
+	 * Some drivers enable beacon filtering but also use software CQM which
+	 * mac80211 detects and returns -ENOTSUP. There is no way to check this
+	 * ahead of time so if we see this start polling in order to get RSSI
+	 * updates.
+	 */
+	if (err == -ENOTSUP) {
+		l_debug("CMD_SET_CQM not supported, falling back to polling");
+		netdev->cqm_poll_fallback = true;
+
+		if (netdev->rssi_poll_timeout)
+			return;
+
+		netdev->rssi_poll_timeout = l_timeout_create(1,
+						netdev_rssi_poll, netdev, NULL);
+
 		return;
+	}
 
 	ext_error = l_genl_msg_get_extended_error(msg);
 	l_error("CMD_SET_CQM failed: %s",
@@ -3671,8 +3725,10 @@  static int netdev_cqm_rssi_update(struct netdev *netdev)
 	if (!msg)
 		return -EINVAL;
 
-	if (!l_genl_family_send(nl80211, msg, netdev_cmd_set_cqm_cb,
-				NULL, NULL)) {
+	netdev->set_cqm_cmd_id = l_genl_family_send(nl80211, msg,
+						netdev_cmd_set_cqm_cb,
+						netdev, NULL);
+	if (!netdev->set_cqm_cmd_id) {
 		l_genl_msg_unref(msg);
 		return -EIO;
 	}
@@ -5309,8 +5365,10 @@  int netdev_set_rssi_report_levels(struct netdev *netdev, const int8_t *levels,
 	if (!cmd_set_cqm)
 		return -EINVAL;
 
-	if (!l_genl_family_send(nl80211, cmd_set_cqm, netdev_cmd_set_cqm_cb,
-				NULL, NULL)) {
+	netdev->set_cqm_cmd_id = l_genl_family_send(nl80211, cmd_set_cqm,
+							netdev_cmd_set_cqm_cb,
+							netdev, NULL);
+	if (!netdev->set_cqm_cmd_id) {
 		l_genl_msg_unref(cmd_set_cqm);
 		return -EIO;
 	}