diff mbox series

[4/4] station: rate limit packet loss roam scans

Message ID 20231030134837.452957-5-prestwoj@gmail.com (mailing list archive)
State New
Headers show
Series Packet/beacon loss roaming improvements | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
prestwoj/iwd-ci-gitlint fail [4/4] station: rate limit packet loss roam scans 27: B1 Line exceeds max length (82>80): "Oct 27 16:23:47 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)" 32: B1 Line exceeds max length (88>80): "Oct 27 16:23:47 src/station.c:station_start_roam() Using cached neighbor report for roam" 34: B1 Line exceeds max length (84>80): "Oct 27 16:23:47 src/scan.c:scan_request_triggered() Active scan triggered for wdev a" 40: B1 Line exceeds max length (82>80): "Oct 27 16:23:47 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)" 45: B1 Line exceeds max length (88>80): "Oct 27 16:23:47 src/station.c:station_start_roam() Using cached neighbor report for roam" 47: B1 Line exceeds max length (84>80): "Oct 27 16:23:47 src/scan.c:scan_request_triggered() Active scan triggered for wdev a" 53: B1 Line exceeds max length (82>80): "Oct 27 16:23:48 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)" 58: B1 Line exceeds max length (88>80): "Oct 27 16:23:48 src/station.c:station_start_roam() Using cached neighbor report for roam" 60: B1 Line exceeds max length (84>80): "Oct 27 16:23:48 src/scan.c:scan_request_triggered() Active scan triggered for wdev a" 62: B1 Line exceeds max length (83>80): "Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Del Station(20)" 63: B1 Line exceeds max length (86>80): "Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Deauthenticate(39)" 65: B1 Line exceeds max length (82>80): "Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Disconnect(48)"

Commit Message

James Prestwood Oct. 30, 2023, 1:48 p.m. UTC
The packet loss handler puts a higher priority on roaming compared
to the low signal roam path. This is generally beneficial since this
event usually indicates some problem with the BSS and generally is
an indicator that a disconnect will follow sometime soon.

But by immediately issuing a scan we run the risk of causing many
successive scans if more packet loss events arrive following
the roam scans (and if no candidates are found). Logs provided
further.

To help with this handle the first event with priority and
immediately issue a roam scan. If another event comes in within a
certain timeframe (2 seconds) don't immediately scan, but instead
rearm the roam timer instead of issuing a scan. This also handles
the case of a low signal roam scan followed by a packet loss
event. Delaying the roam will at least provide some time for packets
to get out in between roam scans.

Logs were snipped to be less verbose, but this cycled happened
5 times prior. In total 7 scans were issued in 5 seconds which may
very well have been the reason for the local disconnect:

Oct 27 16:23:46 src/station.c:station_roam_failed() 9
Oct 27 16:23:46 src/wiphy.c:wiphy_radio_work_done() Work item 29 done
Oct 27 16:23:47 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)
Oct 27 16:23:47 src/station.c:station_packets_lost() Packets lost event: 10
Oct 27 16:23:47 src/station.c:station_roam_scan() ifindex: 9
Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 30
Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_next() Starting work item 30
Oct 27 16:23:47 src/station.c:station_start_roam() Using cached neighbor report for roam
Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification Trigger Scan(33)
Oct 27 16:23:47 src/scan.c:scan_request_triggered() Active scan triggered for wdev a
Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification New Scan Results(34)
Oct 27 16:23:47 src/netdev.c:netdev_link_notify() event 16 on ifindex 9
... scan results ...
Oct 27 16:23:47 src/station.c:station_roam_failed() 9
Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_done() Work item 30 done
Oct 27 16:23:47 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)
Oct 27 16:23:47 src/station.c:station_packets_lost() Packets lost event: 10
Oct 27 16:23:47 src/station.c:station_roam_scan() ifindex: 9
Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 31
Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_next() Starting work item 31
Oct 27 16:23:47 src/station.c:station_start_roam() Using cached neighbor report for roam
Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification Trigger Scan(33)
Oct 27 16:23:47 src/scan.c:scan_request_triggered() Active scan triggered for wdev a
Oct 27 16:23:48 src/scan.c:scan_notify() Scan notification New Scan Results(34)
Oct 27 16:23:48 src/netdev.c:netdev_link_notify() event 16 on ifindex 9
... scan results ...
Oct 27 16:23:48 src/station.c:station_roam_failed() 9
Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_done() Work item 31 done
Oct 27 16:23:48 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)
Oct 27 16:23:48 src/station.c:station_packets_lost() Packets lost event: 10
Oct 27 16:23:48 src/station.c:station_roam_scan() ifindex: 9
Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 32
Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_next() Starting work item 32
Oct 27 16:23:48 src/station.c:station_start_roam() Using cached neighbor report for roam
Oct 27 16:23:48 src/scan.c:scan_notify() Scan notification Trigger Scan(33)
Oct 27 16:23:48 src/scan.c:scan_request_triggered() Active scan triggered for wdev a
Oct 27 16:23:49 src/netdev.c:netdev_link_notify() event 16 on ifindex 9
Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Del Station(20)
Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Deauthenticate(39)
Oct 27 16:23:49 src/netdev.c:netdev_deauthenticate_event()
Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Disconnect(48)
Oct 27 16:23:49 src/netdev.c:netdev_disconnect_event()
Oct 27 16:23:49 Received Deauthentication event, reason: 4, from_ap: false
---
 src/station.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

Comments

Denis Kenzior Oct. 30, 2023, 2:48 p.m. UTC | #1
Hi James,

On 10/30/23 08:48, James Prestwood wrote:
> The packet loss handler puts a higher priority on roaming compared
> to the low signal roam path. This is generally beneficial since this
> event usually indicates some problem with the BSS and generally is
> an indicator that a disconnect will follow sometime soon.
> 
> But by immediately issuing a scan we run the risk of causing many
> successive scans if more packet loss events arrive following
> the roam scans (and if no candidates are found). Logs provided
> further.
> 
> To help with this handle the first event with priority and
> immediately issue a roam scan. If another event comes in within a
> certain timeframe (2 seconds) don't immediately scan, but instead
> rearm the roam timer instead of issuing a scan. This also handles
> the case of a low signal roam scan followed by a packet loss
> event. Delaying the roam will at least provide some time for packets
> to get out in between roam scans.
> 
> Logs were snipped to be less verbose, but this cycled happened
> 5 times prior. In total 7 scans were issued in 5 seconds which may
> very well have been the reason for the local disconnect:
> 
> Oct 27 16:23:46 src/station.c:station_roam_failed() 9
> Oct 27 16:23:46 src/wiphy.c:wiphy_radio_work_done() Work item 29 done
> Oct 27 16:23:47 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)
> Oct 27 16:23:47 src/station.c:station_packets_lost() Packets lost event: 10
> Oct 27 16:23:47 src/station.c:station_roam_scan() ifindex: 9
> Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 30
> Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_next() Starting work item 30
> Oct 27 16:23:47 src/station.c:station_start_roam() Using cached neighbor report for roam
> Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification Trigger Scan(33)
> Oct 27 16:23:47 src/scan.c:scan_request_triggered() Active scan triggered for wdev a
> Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification New Scan Results(34)
> Oct 27 16:23:47 src/netdev.c:netdev_link_notify() event 16 on ifindex 9
> ... scan results ...
> Oct 27 16:23:47 src/station.c:station_roam_failed() 9
> Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_done() Work item 30 done
> Oct 27 16:23:47 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)
> Oct 27 16:23:47 src/station.c:station_packets_lost() Packets lost event: 10
> Oct 27 16:23:47 src/station.c:station_roam_scan() ifindex: 9
> Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 31
> Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_next() Starting work item 31
> Oct 27 16:23:47 src/station.c:station_start_roam() Using cached neighbor report for roam
> Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification Trigger Scan(33)
> Oct 27 16:23:47 src/scan.c:scan_request_triggered() Active scan triggered for wdev a
> Oct 27 16:23:48 src/scan.c:scan_notify() Scan notification New Scan Results(34)
> Oct 27 16:23:48 src/netdev.c:netdev_link_notify() event 16 on ifindex 9
> ... scan results ...
> Oct 27 16:23:48 src/station.c:station_roam_failed() 9
> Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_done() Work item 31 done
> Oct 27 16:23:48 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64)
> Oct 27 16:23:48 src/station.c:station_packets_lost() Packets lost event: 10
> Oct 27 16:23:48 src/station.c:station_roam_scan() ifindex: 9
> Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 32
> Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_next() Starting work item 32
> Oct 27 16:23:48 src/station.c:station_start_roam() Using cached neighbor report for roam
> Oct 27 16:23:48 src/scan.c:scan_notify() Scan notification Trigger Scan(33)
> Oct 27 16:23:48 src/scan.c:scan_request_triggered() Active scan triggered for wdev a
> Oct 27 16:23:49 src/netdev.c:netdev_link_notify() event 16 on ifindex 9
> Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Del Station(20)
> Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Deauthenticate(39)
> Oct 27 16:23:49 src/netdev.c:netdev_deauthenticate_event()
> Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Disconnect(48)
> Oct 27 16:23:49 src/netdev.c:netdev_disconnect_event()
> Oct 27 16:23:49 Received Deauthentication event, reason: 4, from_ap: false
> ---
>   src/station.c | 28 +++++++++++++++++++++++++++-
>   1 file changed, 27 insertions(+), 1 deletion(-)
> 

Applied, thanks.

Regards,
-Denis
diff mbox series

Patch

diff --git a/src/station.c b/src/station.c
index 8c87b3e0..bab690b9 100644
--- a/src/station.c
+++ b/src/station.c
@@ -123,6 +123,8 @@  struct station {
 
 	struct wiphy_radio_work_item ft_work;
 
+	uint64_t last_roam_scan;
+
 	bool preparing_roam : 1;
 	bool roam_scan_full : 1;
 	bool signal_low : 1;
@@ -1708,6 +1710,7 @@  static void station_roam_state_clear(struct station *station)
 	station->signal_low = false;
 	station->roam_min_time.tv_sec = 0;
 	station->netconfig_after_roam = false;
+	station->last_roam_scan = 0;
 
 	if (station->roam_scan_id)
 		scan_cancel(netdev_get_wdev_id(station->netdev),
@@ -2711,6 +2714,8 @@  static int station_roam_scan(struct station *station,
 	if (L_WARN_ON(scan_freq_set_isempty(params.freqs)))
 		return -ENOTSUP;
 
+	station->last_roam_scan = l_time_now();
+
 	station->roam_scan_id =
 		scan_active_full(netdev_get_wdev_id(station->netdev), &params,
 					station_roam_scan_triggered,
@@ -3330,10 +3335,13 @@  static void station_disconnect_event(struct station *station, void *event_data)
 	l_warn("Unexpected disconnect event");
 }
 
-#define STATION_PKT_LOSS_THRESHOLD 10
+#define STATION_PKT_LOSS_THRESHOLD	10
+#define LOSS_ROAM_RATE_LIMIT		2
 
 static void station_packets_lost(struct station *station, uint32_t num_pkts)
 {
+	uint64_t elapsed;
+
 	l_debug("Packets lost event: %u", num_pkts);
 
 	if (num_pkts < STATION_PKT_LOSS_THRESHOLD)
@@ -3344,6 +3352,24 @@  static void station_packets_lost(struct station *station, uint32_t num_pkts)
 
 	station_debug_event(station, "packet-loss-roam");
 
+	elapsed = l_time_diff(station->last_roam_scan, l_time_now());
+
+	/*
+	 * If we just issued a roam scan, delay the roam to avoid constant
+	 * scanning.
+	 */
+	if (LOSS_ROAM_RATE_LIMIT > l_time_to_secs(elapsed)) {
+		l_debug("Too many roam attempts in %u second timeframe, "
+			"delaying roam", LOSS_ROAM_RATE_LIMIT);
+
+		if (station->roam_trigger_timeout)
+			return;
+
+		station_roam_timeout_rearm(station, LOSS_ROAM_RATE_LIMIT);
+
+		return;
+	}
+
 	station_start_roam(station);
 }