diff mbox series

add AllowRoaming station property

Message ID 20231018122534.33455-1-peda@bang-olufsen.dk (mailing list archive)
State New
Headers show
Series add AllowRoaming station property | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
prestwoj/iwd-alpine-ci-fetch success Fetch PR
prestwoj/iwd-ci-gitlint success GitLint
prestwoj/iwd-ci-fetch success Fetch PR
prestwoj/iwd-ci-makedistcheck success Make Distcheck
prestwoj/iwd-ci-incremental_build success Incremental build not run PASS
prestwoj/iwd-alpine-ci-makedistcheck fail Make Distcheck Make FAIL: ../../build-aux/test-driver: line 112: 20879 Segmentation fault (core dumped) "$@" >> "$log_file" 2>&1 make[4]: *** [Makefile:2924: test-suite.log] Error 1 make[3]: *** [Makefile:3032: check-TESTS] Error 2 make[2]: *** [Makefile:3389: check-am] Error 2 make[1]: *** [Makefile:3391: check] Error 2 make: *** [Makefile:3310: distcheck] Error 1
prestwoj/iwd-alpine-ci-incremental_build success Incremental build not run PASS
prestwoj/iwd-ci-build success Build - Configure
prestwoj/iwd-alpine-ci-build success Build - Configure
prestwoj/iwd-ci-clang success clang PASS
prestwoj/iwd-ci-makecheckvalgrind success Make Check w/Valgrind
prestwoj/iwd-ci-makecheck success Make Check
prestwoj/iwd-alpine-ci-makecheckvalgrind fail Make Check FAIL: ./build-aux/test-driver: line 112: 38243 Segmentation fault (core dumped) "$@" >> "$log_file" 2>&1 make[3]: *** [Makefile:2924: test-suite.log] Error 1 make[2]: *** [Makefile:3032: check-TESTS] Error 2 make[1]: *** [Makefile:3389: check-am] Error 2 make: *** [Makefile:3391: check] Error 2
prestwoj/iwd-alpine-ci-makecheck fail Make Check FAIL: ./build-aux/test-driver: line 112: 38483 Segmentation fault (core dumped) "$@" >> "$log_file" 2>&1 make[3]: *** [Makefile:2924: test-suite.log] Error 1 make[2]: *** [Makefile:3032: check-TESTS] Error 2 make[1]: *** [Makefile:3389: check-am] Error 2 make: *** [Makefile:3391: check] Error 2
prestwoj/iwd-ci-testrunner success test-runner PASS

Commit Message

Pedro André Oct. 18, 2023, 12:27 p.m. UTC
From: Pedro Andre <peda@bang-olufsen.dk>

This adds an AllowRoaming property to the station struct that makes it
possible to indicate to the station/interface that it should not go
into roaming state (when set to false). This property defaults to true
(i.e. it defaults to the normal iwd behaviour that allows roaming).

---
 src/station.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

Comments

James Prestwood Oct. 18, 2023, 12:51 p.m. UTC | #1
Hi Pedro,

On 10/18/23 5:27 AM, Pedro André wrote:
> From: Pedro Andre <peda@bang-olufsen.dk>
> 
> This adds an AllowRoaming property to the station struct that makes it
> possible to indicate to the station/interface that it should not go
> into roaming state (when set to false). This property defaults to true
> (i.e. it defaults to the normal iwd behaviour that allows roaming).
> 
> ---
>   src/station.c | 40 ++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 40 insertions(+)
> 
> diff --git a/src/station.c b/src/station.c
> index 065687d..00254b7 100644
> --- a/src/station.c
> +++ b/src/station.c
> @@ -130,6 +130,7 @@ struct station {
>   	bool autoconnect : 1;
>   	bool autoconnect_can_start : 1;
>   	bool netconfig_after_roam : 1;
> +	bool allow_roaming : 1;
>   };
>   
>   struct anqp_entry {
> @@ -2754,6 +2755,9 @@ static bool station_cannot_roam(struct station *station)
>   	const struct l_settings *config = iwd_get_config();
>   	bool disabled;
>   
> +	if (!station->allow_roaming)
> +		return true;
> +
>   	/*
>   	 * Disable roaming with hardware that can roam automatically. Note this
>   	 * is now required for recent kernels which have CQM event support on
> @@ -4155,6 +4159,38 @@ static bool station_property_get_state(struct l_dbus *dbus,
>   	return true;
>   }
>   
> +static bool station_property_get_allow_roaming(struct l_dbus *dbus,
> +					struct l_dbus_message *message,
> +					struct l_dbus_message_builder *builder,
> +					void *user_data)
> +{
> +	struct station *station = user_data;
> +	bool roaming = station->allow_roaming;
> +
> +	l_dbus_message_builder_append_basic(builder, 'b', &roaming);
> +	return true;
> +}
> +
> +static struct l_dbus_message *station_property_set_allow_roaming(
> +					struct l_dbus *dbus,
> +					struct l_dbus_message *message,
> +					struct l_dbus_message_iter *new_value,
> +					l_dbus_property_complete_cb_t complete,
> +					void *user_data)
> +{
> +	struct station *station = user_data;
> +	bool roaming;
> +
> +	if (!l_dbus_message_iter_get_variant(new_value, "b", &roaming))
> +		return dbus_error_invalid_args(message);
> +
> +	l_debug("Setting allow_roaming %s", roaming ? "true" : "false");
> +
> +	station->allow_roaming = roaming;
> +
> +	return l_dbus_message_new_method_return(message);
> +}
> +
>   void station_foreach(station_foreach_func_t func, void *user_data)
>   {
>   	const struct l_queue_entry *entry;
> @@ -4358,6 +4394,8 @@ static struct station *station_create(struct netdev *netdev)
>   
>   	station->roam_bss_list = l_queue_new();
>   
> +	station->allow_roaming = true;
> +
>   	return station;
>   }
>   
> @@ -4484,6 +4522,8 @@ static void station_setup_interface(struct l_dbus_interface *interface)
>   					station_property_get_scanning, NULL);
>   	l_dbus_interface_property(interface, "State", 0, "s",
>   					station_property_get_state, NULL);
> +	l_dbus_interface_property(interface, "AllowRoaming", 0, "b",
> +				station_property_get_allow_roaming, station_property_set_allow_roaming);
>   }
>   
>   static void station_destroy_interface(void *user_data)

LGTM, I actually needed something along these lines for dynamically 
disabling roaming e.g. during a software upgrade or some time when 
networking is critical. Just hadn't gotten to it yet.

Thanks,
James
Denis Kenzior Oct. 18, 2023, 2:20 p.m. UTC | #2
Hi Pedro,

On 10/18/23 07:27, Pedro André wrote:
> From: Pedro Andre <peda@bang-olufsen.dk>
> 
> This adds an AllowRoaming property to the station struct that makes it
> possible to indicate to the station/interface that it should not go
> into roaming state (when set to false). This property defaults to true
> (i.e. it defaults to the normal iwd behaviour that allows roaming).
> 

Is it possible for you to share the actual use case for this?  It would help to 
understand what problem this is trying to solve.  One problem I see here is that 
we might still want to honor AP directed roaming.

> ---
>   src/station.c | 40 ++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 40 insertions(+)
> 

<snip>

> +static struct l_dbus_message *station_property_set_allow_roaming(
> +					struct l_dbus *dbus,
> +					struct l_dbus_message *message,
> +					struct l_dbus_message_iter *new_value,
> +					l_dbus_property_complete_cb_t complete,
> +					void *user_data)
> +{
> +	struct station *station = user_data;
> +	bool roaming;
> +
> +	if (!l_dbus_message_iter_get_variant(new_value, "b", &roaming))
> +		return dbus_error_invalid_args(message);
> +
> +	l_debug("Setting allow_roaming %s", roaming ? "true" : "false");
> +
> +	station->allow_roaming = roaming;

So what happens if we're in the middle of a roam?

Also, what happens if this is reset to true and our rssi is already below the 
roam threshold?

> +
> +	return l_dbus_message_new_method_return(message);
> +}
> +
>   void station_foreach(station_foreach_func_t func, void *user_data)
>   {
>   	const struct l_queue_entry *entry;

<snip>

> @@ -4484,6 +4522,8 @@ static void station_setup_interface(struct l_dbus_interface *interface)
>   					station_property_get_scanning, NULL);
>   	l_dbus_interface_property(interface, "State", 0, "s",
>   					station_property_get_state, NULL);
> +	l_dbus_interface_property(interface, "AllowRoaming", 0, "b",
> +				station_property_get_allow_roaming, station_property_set_allow_roaming);

doc/coding-style.txt, Section 'Background', paragraph 2.

Also, there's missing documentation for this property.  And it seems to overlap 
rather significantly with "DisableRoamingScan" setting.  What's the plan there? 
And since we're adding a new API, can we at least add an auto test?

Regards,
-Denis
Denis Kenzior Oct. 18, 2023, 2:32 p.m. UTC | #3
Hi James,

> 
> LGTM, I actually needed something along these lines for dynamically disabling 
> roaming e.g. during a software upgrade or some time when networking is critical. 
> Just hadn't gotten to it yet.

I'm curious about the use cases.  What were your thoughts on how this should 
look like?

I imagine for things like firmware update you still have a set time limit in 
which this upgrade should happen?

Right now I like Pedro's earlier proposal with the minimum throughput guidance. 
I think we can go further and calculate the roaming RSSI threshold based on the 
minimum throughput.  This would allow iwd to ignore roaming until that minimum 
throughput can no longer be delivered.

If we need a bigger hammer, then we may want to consider using an implementation 
similar to how Modem.Lockdown was implemented in oFono.

Regards,
-Denis
James Prestwood Oct. 18, 2023, 2:56 p.m. UTC | #4
Hi Denis,

On 10/18/23 7:32 AM, Denis Kenzior wrote:
> Hi James,
> 
>>
>> LGTM, I actually needed something along these lines for dynamically 
>> disabling roaming e.g. during a software upgrade or some time when 
>> networking is critical. Just hadn't gotten to it yet.
> 
> I'm curious about the use cases.  What were your thoughts on how this 
> should look like?
> 
> I imagine for things like firmware update you still have a set time 
> limit in which this upgrade should happen?

I wanted to remove the possibility that a roam could cause a network 
timeout during an update. In a perfect world network queries should be 
fault tolerant and retry, but in practice this isn't always the case. 
And to be honest this is probably less of an issue than network 
congestion/latency on the backend causing a problem.

I think any issues that could happen would be entirely related to the 
infrastructure but nevertheless if avoiding a roam temporarily makes for 
better reliability I'd vote to do it.

For example one problem we see infrequently, but it does happen, is 
complete loss of IP networking post roam. Disabling roams during an 
update would at least get the update finished before this potentially 
happens.

> 
> Right now I like Pedro's earlier proposal with the minimum throughput 
> guidance. I think we can go further and calculate the roaming RSSI 
> threshold based on the minimum throughput.  This would allow iwd to 
> ignore roaming until that minimum throughput can no longer be delivered. >
> If we need a bigger hammer, then we may want to consider using an 
> implementation similar to how Modem.Lockdown was implemented in oFono.

I'll check that out.

> 
> Regards,
> -Denis
Pedro André Oct. 19, 2023, 11:55 a.m. UTC | #5
Hi Denis,

First of all, thank you for your feedback.

On 18/10/2023 16:20, Denis Kenzior wrote:
> Hi Pedro,
>
> On 10/18/23 07:27, Pedro André wrote:
>> From: Pedro Andre <peda@bang-olufsen.dk>
>>
>> This adds an AllowRoaming property to the station struct that makes it
>> possible to indicate to the station/interface that it should not go
>> into roaming state (when set to false). This property defaults to true
>> (i.e. it defaults to the normal iwd behaviour that allows roaming).
>>
>
> Is it possible for you to share the actual use case for this?  It 
> would help to understand what problem this is trying to solve. One 
> problem I see here is that we might still want to honor AP directed 
> roaming.
>

There is a use case where a device can be connected to an infrastructure 
AP and hosting its own AP. In this specific case it would be ideal to 
disable roaming since swapping the infrastructure AP can lead to 
instability on devices connected to the AP being hosted. In this case, 
the device should act as if it is not able to roam, so I was under the 
impression it would be OK to not honor AP directed roaming.

This setting would also need to be dynamic (i.e. set during runtime), 
since roaming might be needed during the same session if the use case 
changes (i.e. we no longer need to host an AP).


>> ---
>>   src/station.c | 40 ++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 40 insertions(+)
>>
>
> <snip>
>
>> +static struct l_dbus_message *station_property_set_allow_roaming(
>> +                    struct l_dbus *dbus,
>> +                    struct l_dbus_message *message,
>> +                    struct l_dbus_message_iter *new_value,
>> +                    l_dbus_property_complete_cb_t complete,
>> +                    void *user_data)
>> +{
>> +    struct station *station = user_data;
>> +    bool roaming;
>> +
>> +    if (!l_dbus_message_iter_get_variant(new_value, "b", &roaming))
>> +        return dbus_error_invalid_args(message);
>> +
>> +    l_debug("Setting allow_roaming %s", roaming ? "true" : "false");
>> +
>> +    station->allow_roaming = roaming;
>
> So what happens if we're in the middle of a roam?
>
> Also, what happens if this is reset to true and our rssi is already 
> below the roam threshold?


If we are in the middle of a roam, I would say we can let it finish and 
only after do we start honoring the AllowRoaming setting.

Furthermore, you raise a very valid point in regards to checking if we 
should roam once the property goes back to default. I can add this logic 
to the next iteration of the patch if you think it is worth it.


>
>
>> +
>> +    return l_dbus_message_new_method_return(message);
>> +}
>> +
>>   void station_foreach(station_foreach_func_t func, void *user_data)
>>   {
>>       const struct l_queue_entry *entry;
>
> <snip>
>
>> @@ -4484,6 +4522,8 @@ static void station_setup_interface(struct 
>> l_dbus_interface *interface)
>>                       station_property_get_scanning, NULL);
>>       l_dbus_interface_property(interface, "State", 0, "s",
>>                       station_property_get_state, NULL);
>> +    l_dbus_interface_property(interface, "AllowRoaming", 0, "b",
>> +                station_property_get_allow_roaming, 
>> station_property_set_allow_roaming);
>
> doc/coding-style.txt, Section 'Background', paragraph 2.
>
> Also, there's missing documentation for this property.  And it seems 
> to overlap rather significantly with "DisableRoamingScan" setting.  
> What's the plan there? And since we're adding a new API, can we at 
> least add an auto test? 
>
> Regards,
> -Denis


It does indeed overlap entirely with the "DisableRoamingScan" setting. 
The only difference is that "AllowRoaming" can be set during runtime 
trough D-Bus. Having a dynamic property is ideal for our use case as 
described above. If you think it is preferable, we can also merge the two.

Also, apologies for the bad style/format. Will improve on the next 
iteration of the patch together with documentation for the property and 
an auto test.


Regards,
-Pedro
Denis Kenzior Oct. 20, 2023, 2:57 p.m. UTC | #6
Hi Pedro,

On 10/19/23 06:55, Pedro André wrote:
> Hi Denis,
> 
> First of all, thank you for your feedback.
> 

No worries, I'm a bit swamped these days so my responses are sometimes delayed. 
Just FYI.

> 
> There is a use case where a device can be connected to an infrastructure
> AP and hosting its own AP. In this specific case it would be ideal to
> disable roaming since swapping the infrastructure AP can lead to
> instability on devices connected to the AP being hosted. In this case,
> the device should act as if it is not able to roam, so I was under the
> impression it would be OK to not honor AP directed roaming.

So AP directed roaming is a bit of a unicorn.  I haven't really seen it used in 
practice.  However, when it is enabled, it is usually pretty aggressive.  If you 
don't honor the roam request you will likely get disconnected.  Don't know if 
this impacts your design.

> 
> This setting would also need to be dynamic (i.e. set during runtime),
> since roaming might be needed during the same session if the use case
> changes (i.e. we no longer need to host an AP).

Sure, that makes sense.  I do think we need the ability to track whether this 
feature is being actively used (i.e. by tracking the D-Bus client that set the 
property) and go back to the default behavior if the client crashes, etc. 
Here's an example of how this was handled in oFono:

https://git.kernel.org/pub/scm/network/ofono/ofono.git/tree/doc/modem-api.txt#n42
https://git.kernel.org/pub/scm/network/ofono/ofono.git/tree/src/modem.c#n1022

> 
> If we are in the middle of a roam, I would say we can let it finish and
> only after do we start honoring the AllowRoaming setting.

Ok, fair enough.

> 
> Furthermore, you raise a very valid point in regards to checking if we
> should roam once the property goes back to default. I can add this logic
> to the next iteration of the patch if you think it is worth it.
> 

Yes, I think that would be nice.

> It does indeed overlap entirely with the "DisableRoamingScan" setting.
> The only difference is that "AllowRoaming" can be set during runtime
> trough D-Bus. Having a dynamic property is ideal for our use case as
> described above. If you think it is preferable, we can also merge the two.

DisableRoamingScan is something that isn't used very widely.  We don't even have 
auto tests for it for example.  My vote would be to combine the two and put 
DisableRoamingScan on a path to depreciation and eventual removal.

Regards,
-Denis
James Prestwood Oct. 20, 2023, 4:29 p.m. UTC | #7
Hi,

On 10/20/23 7:57 AM, Denis Kenzior wrote:
> Hi Pedro,
> 
> On 10/19/23 06:55, Pedro André wrote:
>> Hi Denis,
>>
>> First of all, thank you for your feedback.
>>
> 
> No worries, I'm a bit swamped these days so my responses are sometimes 
> delayed. Just FYI.
> 
>>
>> There is a use case where a device can be connected to an infrastructure
>> AP and hosting its own AP. In this specific case it would be ideal to
>> disable roaming since swapping the infrastructure AP can lead to
>> instability on devices connected to the AP being hosted. In this case,
>> the device should act as if it is not able to roam, so I was under the
>> impression it would be OK to not honor AP directed roaming.
> 
> So AP directed roaming is a bit of a unicorn.  I haven't really seen it 
> used in practice.  However, when it is enabled, it is usually pretty 
> aggressive.  If you don't honor the roam request you will likely get 
> disconnected.  Don't know if this impacts your design.

Just my two cents. I do see it used in our deployments, but the APs 
never seem to set the "disassociation imminent" bits so IWD will still 
decide if it should roam or not.

Btw hostapd has 3 separate CLI commands for transition management: 
DISASSOC_IMMINENT, ESS_DISASSOC, and BSS_TM_REQ.

The BSS_TM_REQ won't set the imminent bits unless additional options are 
passed so it wouldn't surprise me if nearly all APs leave those bits 
unset :)

I think its very unclear what the intent is of the AP sending the 
transition request. It likely sends identical frames regardless of the 
circumstances whether its "I am a bit overloaded, please leave" or "I'm 
shutting down leave immediately".

Thanks,
James
diff mbox series

Patch

diff --git a/src/station.c b/src/station.c
index 065687d..00254b7 100644
--- a/src/station.c
+++ b/src/station.c
@@ -130,6 +130,7 @@  struct station {
 	bool autoconnect : 1;
 	bool autoconnect_can_start : 1;
 	bool netconfig_after_roam : 1;
+	bool allow_roaming : 1;
 };
 
 struct anqp_entry {
@@ -2754,6 +2755,9 @@  static bool station_cannot_roam(struct station *station)
 	const struct l_settings *config = iwd_get_config();
 	bool disabled;
 
+	if (!station->allow_roaming)
+		return true;
+
 	/*
 	 * Disable roaming with hardware that can roam automatically. Note this
 	 * is now required for recent kernels which have CQM event support on
@@ -4155,6 +4159,38 @@  static bool station_property_get_state(struct l_dbus *dbus,
 	return true;
 }
 
+static bool station_property_get_allow_roaming(struct l_dbus *dbus,
+					struct l_dbus_message *message,
+					struct l_dbus_message_builder *builder,
+					void *user_data)
+{
+	struct station *station = user_data;
+	bool roaming = station->allow_roaming;
+
+	l_dbus_message_builder_append_basic(builder, 'b', &roaming);
+	return true;
+}
+
+static struct l_dbus_message *station_property_set_allow_roaming(
+					struct l_dbus *dbus,
+					struct l_dbus_message *message,
+					struct l_dbus_message_iter *new_value,
+					l_dbus_property_complete_cb_t complete,
+					void *user_data)
+{
+	struct station *station = user_data;
+	bool roaming;
+
+	if (!l_dbus_message_iter_get_variant(new_value, "b", &roaming))
+		return dbus_error_invalid_args(message);
+
+	l_debug("Setting allow_roaming %s", roaming ? "true" : "false");
+
+	station->allow_roaming = roaming;
+
+	return l_dbus_message_new_method_return(message);
+}
+
 void station_foreach(station_foreach_func_t func, void *user_data)
 {
 	const struct l_queue_entry *entry;
@@ -4358,6 +4394,8 @@  static struct station *station_create(struct netdev *netdev)
 
 	station->roam_bss_list = l_queue_new();
 
+	station->allow_roaming = true;
+
 	return station;
 }
 
@@ -4484,6 +4522,8 @@  static void station_setup_interface(struct l_dbus_interface *interface)
 					station_property_get_scanning, NULL);
 	l_dbus_interface_property(interface, "State", 0, "s",
 					station_property_get_state, NULL);
+	l_dbus_interface_property(interface, "AllowRoaming", 0, "b",
+				station_property_get_allow_roaming, station_property_set_allow_roaming);
 }
 
 static void station_destroy_interface(void *user_data)