diff mbox series

[RFC,1/1] mac80211: use AQL airtime for expected throughput.

Message ID 20220525103512.3666956-1-gasmibal@gmail.com (mailing list archive)
State RFC
Delegated to: Johannes Berg
Headers show
Series [RFC,1/1] mac80211: use AQL airtime for expected throughput. | expand

Commit Message

Baligh GASMI May 25, 2022, 10:35 a.m. UTC
Since the integration of AQL, packet TX airtime estimation is
calculated and counted to be used for the dequeue limit.

Use this estimated airtime to compute expected throughput for
each station.

It will be a generic mac80211 implementation. If the driver has
get_expected_throughput implementation, it will be used instead.

Useful for L2 routing protocols, like B.A.T.M.A.N.

Signed-off-by: Baligh Gasmi <gasmibal@gmail.com>
---
 net/mac80211/driver-ops.h |  2 ++
 net/mac80211/sta_info.h   |  2 ++
 net/mac80211/status.c     | 22 ++++++++++++++++++++++
 net/mac80211/tx.c         |  3 ++-
 4 files changed, 28 insertions(+), 1 deletion(-)

Comments

Toke Høiland-Jørgensen May 25, 2022, 11:02 a.m. UTC | #1
Baligh Gasmi <gasmibal@gmail.com> writes:

> Since the integration of AQL, packet TX airtime estimation is
> calculated and counted to be used for the dequeue limit.
>
> Use this estimated airtime to compute expected throughput for
> each station.
>
> It will be a generic mac80211 implementation. If the driver has
> get_expected_throughput implementation, it will be used instead.
>
> Useful for L2 routing protocols, like B.A.T.M.A.N.
>
> Signed-off-by: Baligh Gasmi <gasmibal@gmail.com>
> ---
>  net/mac80211/driver-ops.h |  2 ++
>  net/mac80211/sta_info.h   |  2 ++
>  net/mac80211/status.c     | 22 ++++++++++++++++++++++
>  net/mac80211/tx.c         |  3 ++-
>  4 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
> index 4e2fc1a08681..4331b79647fa 100644
> --- a/net/mac80211/driver-ops.h
> +++ b/net/mac80211/driver-ops.h
> @@ -1142,6 +1142,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
>  	trace_drv_get_expected_throughput(&sta->sta);
>  	if (local->ops->get_expected_throughput && sta->uploaded)
>  		ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
> +	else
> +		ret = ewma_avg_est_tp_read(&sta->status_stats.avg_est_tp);
>  	trace_drv_return_u32(local, ret);
>  
>  	return ret;
> diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
> index 379fd367197f..fe60be4c671d 100644
> --- a/net/mac80211/sta_info.h
> +++ b/net/mac80211/sta_info.h
> @@ -123,6 +123,7 @@ enum ieee80211_sta_info_flags {
>  #define HT_AGG_STATE_STOP_CB		7
>  #define HT_AGG_STATE_SENT_ADDBA		8
>  
> +DECLARE_EWMA(avg_est_tp, 8, 16)
>  DECLARE_EWMA(avg_signal, 10, 8)
>  enum ieee80211_agg_stop_reason {
>  	AGG_STOP_DECLINED,
> @@ -641,6 +642,7 @@ struct sta_info {
>  		s8 last_ack_signal;
>  		bool ack_signal_filled;
>  		struct ewma_avg_signal avg_ack_signal;
> +		struct ewma_avg_est_tp avg_est_tp;
>  	} status_stats;
>  
>  	/* Updated from TX path only, no locking requirements */
> diff --git a/net/mac80211/status.c b/net/mac80211/status.c
> index e81e8a5bb774..647ade3719f5 100644
> --- a/net/mac80211/status.c
> +++ b/net/mac80211/status.c
> @@ -1145,6 +1145,28 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
>  			sta->status_stats.retry_failed++;
>  		sta->status_stats.retry_count += retry_count;
>  
> +		if (skb && tx_time_est) {

Shouldn't this be conditioned on actually being used (i.e., existence of
get_expected_throughput op? Also maybe pull it out into its own function
to make it clear what it's doing...

> +			/* max average packet size */
> +			size_t pkt_size = skb->len > 1024 ? 1024 : skb->len;
> +
> +			if (acked) {
> +				/* ACK packet size */
> +				pkt_size += 14;
> +				/* SIFS x 2 */
> +				tx_time_est += 2 * 2;
> +			}
> +
> +			/* Backoff average x retries */
> +			tx_time_est += retry_count ? retry_count * 2 : 2;
> +
> +			/* failed tx */
> +			if (!acked && !noack_success)
> +				pkt_size = 0;
> +
> +			ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp,
> +					    ((pkt_size * 8) * 1000) / tx_time_est);

Could we avoid adding this division in the fast path?

> +		}
> +
>  		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
>  			if (sdata->vif.type == NL80211_IFTYPE_STATION &&
>  			    skb && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP))
> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index b6b20f38de0e..d866a721690d 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -3793,7 +3793,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
>  	IEEE80211_SKB_CB(skb)->control.vif = vif;
>  
>  	if (vif &&
> -	    wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
> +	    (!local->ops->get_expected_throughput ||
> +	    wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))) {

This implicitly enables AQL for every driver that doesn't set
get_expected_throughput, no? That is probably not a good idea...

-Toke
Baligh GASMI May 25, 2022, 12:14 p.m. UTC | #2
> > Since the integration of AQL, packet TX airtime estimation is
> > calculated and counted to be used for the dequeue limit.
> >
> > Use this estimated airtime to compute expected throughput for
> > each station.
> >
> > It will be a generic mac80211 implementation. If the driver has
> > get_expected_throughput implementation, it will be used instead.
> >
> > Useful for L2 routing protocols, like B.A.T.M.A.N.
> >
> > Signed-off-by: Baligh Gasmi <gasmibal@gmail.com>
> > ---
> >  net/mac80211/driver-ops.h |  2 ++
> >  net/mac80211/sta_info.h   |  2 ++
> >  net/mac80211/status.c     | 22 ++++++++++++++++++++++
> >  net/mac80211/tx.c         |  3 ++-
> >  4 files changed, 28 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
> > index 4e2fc1a08681..4331b79647fa 100644
> > --- a/net/mac80211/driver-ops.h
> > +++ b/net/mac80211/driver-ops.h
> > @@ -1142,6 +1142,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
> >       trace_drv_get_expected_throughput(&sta->sta);
> >       if (local->ops->get_expected_throughput && sta->uploaded)
> >               ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
> > +     else
> > +             ret = ewma_avg_est_tp_read(&sta->status_stats.avg_est_tp);
> >       trace_drv_return_u32(local, ret);
> >
> >       return ret;
> > diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
> > index 379fd367197f..fe60be4c671d 100644
> > --- a/net/mac80211/sta_info.h
> > +++ b/net/mac80211/sta_info.h
> > @@ -123,6 +123,7 @@ enum ieee80211_sta_info_flags {
> >  #define HT_AGG_STATE_STOP_CB         7
> >  #define HT_AGG_STATE_SENT_ADDBA              8
> >
> > +DECLARE_EWMA(avg_est_tp, 8, 16)
> >  DECLARE_EWMA(avg_signal, 10, 8)
> >  enum ieee80211_agg_stop_reason {
> >       AGG_STOP_DECLINED,
> > @@ -641,6 +642,7 @@ struct sta_info {
> >               s8 last_ack_signal;
> >               bool ack_signal_filled;
> >               struct ewma_avg_signal avg_ack_signal;
> > +             struct ewma_avg_est_tp avg_est_tp;
> >       } status_stats;
> >
> >       /* Updated from TX path only, no locking requirements */
> > diff --git a/net/mac80211/status.c b/net/mac80211/status.c
> > index e81e8a5bb774..647ade3719f5 100644
> > --- a/net/mac80211/status.c
> > +++ b/net/mac80211/status.c
> > @@ -1145,6 +1145,28 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
> >                       sta->status_stats.retry_failed++;
> >               sta->status_stats.retry_count += retry_count;
> >
> > +             if (skb && tx_time_est) {
>
> Shouldn't this be conditioned on actually being used (i.e., existence of
> get_expected_throughput op? Also maybe pull it out into its own function
> to make it clear what it's doing...

It's already the case I think, since the tx_time_est is not-zero only
when actually an estimated time is set.
A dedicated function seems good for me, for clarity, yes.

>
> > +                     /* max average packet size */
> > +                     size_t pkt_size = skb->len > 1024 ? 1024 : skb->len;
> > +
> > +                     if (acked) {
> > +                             /* ACK packet size */
> > +                             pkt_size += 14;
> > +                             /* SIFS x 2 */
> > +                             tx_time_est += 2 * 2;
> > +                     }
> > +
> > +                     /* Backoff average x retries */
> > +                     tx_time_est += retry_count ? retry_count * 2 : 2;
> > +
> > +                     /* failed tx */
> > +                     if (!acked && !noack_success)
> > +                             pkt_size = 0;
> > +
> > +                     ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp,
> > +                                         ((pkt_size * 8) * 1000) / tx_time_est);
>
> Could we avoid adding this division in the fast path?

Maybe we can use the do_div() macro for optimization, I don't see how
we can avoid it.


>
> > +             }
> > +
> >               if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
> >                       if (sdata->vif.type == NL80211_IFTYPE_STATION &&
> >                           skb && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP))
> > diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> > index b6b20f38de0e..d866a721690d 100644
> > --- a/net/mac80211/tx.c
> > +++ b/net/mac80211/tx.c
> > @@ -3793,7 +3793,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
> >       IEEE80211_SKB_CB(skb)->control.vif = vif;
> >
> >       if (vif &&
> > -         wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
> > +         (!local->ops->get_expected_throughput ||
> > +         wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))) {
>
> This implicitly enables AQL for every driver that doesn't set
> get_expected_throughput, no? That is probably not a good idea...

No, AQL will be disabled if it's already the case, according to the
NL80211_EXT_FEATURE_AQL, only airtime estimation will be activated
with this diff. The function ieee80211_sta_update_pending_airtime() is
already checking for NL80211_EXT_FEATURE_AQL.

>
> -Toke
>

Back to the base idea, what do you think about it, can it be a good
road to take ?
Other factors can be added progressively to better estimate the throughput...
Felix Fietkau May 25, 2022, 12:50 p.m. UTC | #3
On 25.05.22 14:14, Baligh GASMI wrote:
>> > Since the integration of AQL, packet TX airtime estimation is
>> > calculated and counted to be used for the dequeue limit.
>> >
>> > Use this estimated airtime to compute expected throughput for
>> > each station.
>> >
>> > It will be a generic mac80211 implementation. If the driver has
>> > get_expected_throughput implementation, it will be used instead.
>> >
>> > Useful for L2 routing protocols, like B.A.T.M.A.N.
>> >
>> > Signed-off-by: Baligh Gasmi <gasmibal@gmail.com>
>> > ---
>> >  net/mac80211/driver-ops.h |  2 ++
>> >  net/mac80211/sta_info.h   |  2 ++
>> >  net/mac80211/status.c     | 22 ++++++++++++++++++++++
>> >  net/mac80211/tx.c         |  3 ++-
>> >  4 files changed, 28 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
>> > index 4e2fc1a08681..4331b79647fa 100644
>> > --- a/net/mac80211/driver-ops.h
>> > +++ b/net/mac80211/driver-ops.h
>> > @@ -1142,6 +1142,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
>> >       trace_drv_get_expected_throughput(&sta->sta);
>> >       if (local->ops->get_expected_throughput && sta->uploaded)
>> >               ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
>> > +     else
>> > +             ret = ewma_avg_est_tp_read(&sta->status_stats.avg_est_tp);
>> >       trace_drv_return_u32(local, ret);
>> >
>> >       return ret;
>> > diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
>> > index 379fd367197f..fe60be4c671d 100644
>> > --- a/net/mac80211/sta_info.h
>> > +++ b/net/mac80211/sta_info.h
>> > @@ -123,6 +123,7 @@ enum ieee80211_sta_info_flags {
>> >  #define HT_AGG_STATE_STOP_CB         7
>> >  #define HT_AGG_STATE_SENT_ADDBA              8
>> >
>> > +DECLARE_EWMA(avg_est_tp, 8, 16)
>> >  DECLARE_EWMA(avg_signal, 10, 8)
>> >  enum ieee80211_agg_stop_reason {
>> >       AGG_STOP_DECLINED,
>> > @@ -641,6 +642,7 @@ struct sta_info {
>> >               s8 last_ack_signal;
>> >               bool ack_signal_filled;
>> >               struct ewma_avg_signal avg_ack_signal;
>> > +             struct ewma_avg_est_tp avg_est_tp;
>> >       } status_stats;
>> >
>> >       /* Updated from TX path only, no locking requirements */
>> > diff --git a/net/mac80211/status.c b/net/mac80211/status.c
>> > index e81e8a5bb774..647ade3719f5 100644
>> > --- a/net/mac80211/status.c
>> > +++ b/net/mac80211/status.c
>> > @@ -1145,6 +1145,28 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
>> >                       sta->status_stats.retry_failed++;
>> >               sta->status_stats.retry_count += retry_count;
>> >
>> > +             if (skb && tx_time_est) {
>>
>> Shouldn't this be conditioned on actually being used (i.e., existence of
>> get_expected_throughput op? Also maybe pull it out into its own function
>> to make it clear what it's doing...
> 
> It's already the case I think, since the tx_time_est is not-zero only
> when actually an estimated time is set.
> A dedicated function seems good for me, for clarity, yes.
> 
>>
>> > +                     /* max average packet size */
>> > +                     size_t pkt_size = skb->len > 1024 ? 1024 : skb->len;
>> > +
>> > +                     if (acked) {
>> > +                             /* ACK packet size */
>> > +                             pkt_size += 14;
>> > +                             /* SIFS x 2 */
>> > +                             tx_time_est += 2 * 2;
>> > +                     }
>> > +
>> > +                     /* Backoff average x retries */
>> > +                     tx_time_est += retry_count ? retry_count * 2 : 2;
>> > +
>> > +                     /* failed tx */
>> > +                     if (!acked && !noack_success)
>> > +                             pkt_size = 0;
>> > +
>> > +                     ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp,
>> > +                                         ((pkt_size * 8) * 1000) / tx_time_est);
>>
>> Could we avoid adding this division in the fast path?
> 
> Maybe we can use the do_div() macro for optimization, I don't see how
> we can avoid it.
Here's one idea on how to make it less expensive and improve the average 
of the throughput estimation at the same time:

In sta->status_stats add these fields:

u32 last_tp_update;
u32 pkt_size;
u32 tx_time_est;

And in this part of the code you do something like this:

u32 diff = ((u32)jiffies) - sta->status_stats.last_tp_update;
sta->status_stats.pkt_size += (pkt_size * 8) * 1000;
sta->status_stats.tx_time_est += tx_time_est;
if (diff > HZ / 10) {
	ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp,
			    sta->status_stats.pkt_size /
			    sta->status_stats.tx_time_est);
	sta->status_stats.pkt_size = 0;
	sta->status_stats.tx_time_est = 0;
}

This ensures that you will only get max. 1 update per 100ms interval.

Does that make sense?

- Felix
Baligh GASMI May 25, 2022, 1:37 p.m. UTC | #4
Indeed it's less expensive.

I'll try to make something in this direction to see what it looks like.

- Baligh
Felix Fietkau May 25, 2022, 5:54 p.m. UTC | #5
On 25.05.22 15:37, Baligh GASMI wrote:
> Indeed it's less expensive.
> 
> I'll try to make something in this direction to see what it looks like.
Thanks. Please also make sure that all of this extra work is not 
performed for drivers that don't need it, because they either implement 
.get_expected_throughput, or use minstrel, which also implements it in a 
better way.

- Felix
Felix Fietkau May 25, 2022, 5:56 p.m. UTC | #6
On 25.05.22 19:21, Dave Taht wrote:
> Sigh. Ideally we are trying to keep latency low by deeply understanding 
> current conditions. Batching up anything on this large interval feels 
> wrong. Powersave and beacons become "beating" frequencies.
Shorter intervals (or even doing it without batching at all) will likely 
lead to much more fluctuation, which could make the average less useful.
I think batching it up will likely be beneficial for protocols like 
batman-adv.

- Felix
diff mbox series

Patch

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 4e2fc1a08681..4331b79647fa 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1142,6 +1142,8 @@  static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
 	trace_drv_get_expected_throughput(&sta->sta);
 	if (local->ops->get_expected_throughput && sta->uploaded)
 		ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
+	else
+		ret = ewma_avg_est_tp_read(&sta->status_stats.avg_est_tp);
 	trace_drv_return_u32(local, ret);
 
 	return ret;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 379fd367197f..fe60be4c671d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -123,6 +123,7 @@  enum ieee80211_sta_info_flags {
 #define HT_AGG_STATE_STOP_CB		7
 #define HT_AGG_STATE_SENT_ADDBA		8
 
+DECLARE_EWMA(avg_est_tp, 8, 16)
 DECLARE_EWMA(avg_signal, 10, 8)
 enum ieee80211_agg_stop_reason {
 	AGG_STOP_DECLINED,
@@ -641,6 +642,7 @@  struct sta_info {
 		s8 last_ack_signal;
 		bool ack_signal_filled;
 		struct ewma_avg_signal avg_ack_signal;
+		struct ewma_avg_est_tp avg_est_tp;
 	} status_stats;
 
 	/* Updated from TX path only, no locking requirements */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index e81e8a5bb774..647ade3719f5 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -1145,6 +1145,28 @@  void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
 			sta->status_stats.retry_failed++;
 		sta->status_stats.retry_count += retry_count;
 
+		if (skb && tx_time_est) {
+			/* max average packet size */
+			size_t pkt_size = skb->len > 1024 ? 1024 : skb->len;
+
+			if (acked) {
+				/* ACK packet size */
+				pkt_size += 14;
+				/* SIFS x 2 */
+				tx_time_est += 2 * 2;
+			}
+
+			/* Backoff average x retries */
+			tx_time_est += retry_count ? retry_count * 2 : 2;
+
+			/* failed tx */
+			if (!acked && !noack_success)
+				pkt_size = 0;
+
+			ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp,
+					    ((pkt_size * 8) * 1000) / tx_time_est);
+		}
+
 		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 			if (sdata->vif.type == NL80211_IFTYPE_STATION &&
 			    skb && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP))
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b6b20f38de0e..d866a721690d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3793,7 +3793,8 @@  struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	IEEE80211_SKB_CB(skb)->control.vif = vif;
 
 	if (vif &&
-	    wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
+	    (!local->ops->get_expected_throughput ||
+	    wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))) {
 		bool ampdu = txq->ac != IEEE80211_AC_VO;
 		u32 airtime;