Message ID | 20220525103512.3666956-1-gasmibal@gmail.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Johannes Berg |
Headers | show |
Series | [RFC,1/1] mac80211: use AQL airtime for expected throughput. | expand |
Baligh Gasmi <gasmibal@gmail.com> writes: > Since the integration of AQL, packet TX airtime estimation is > calculated and counted to be used for the dequeue limit. > > Use this estimated airtime to compute expected throughput for > each station. > > It will be a generic mac80211 implementation. If the driver has > get_expected_throughput implementation, it will be used instead. > > Useful for L2 routing protocols, like B.A.T.M.A.N. > > Signed-off-by: Baligh Gasmi <gasmibal@gmail.com> > --- > net/mac80211/driver-ops.h | 2 ++ > net/mac80211/sta_info.h | 2 ++ > net/mac80211/status.c | 22 ++++++++++++++++++++++ > net/mac80211/tx.c | 3 ++- > 4 files changed, 28 insertions(+), 1 deletion(-) > > diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h > index 4e2fc1a08681..4331b79647fa 100644 > --- a/net/mac80211/driver-ops.h > +++ b/net/mac80211/driver-ops.h > @@ -1142,6 +1142,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, > trace_drv_get_expected_throughput(&sta->sta); > if (local->ops->get_expected_throughput && sta->uploaded) > ret = local->ops->get_expected_throughput(&local->hw, &sta->sta); > + else > + ret = ewma_avg_est_tp_read(&sta->status_stats.avg_est_tp); > trace_drv_return_u32(local, ret); > > return ret; > diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h > index 379fd367197f..fe60be4c671d 100644 > --- a/net/mac80211/sta_info.h > +++ b/net/mac80211/sta_info.h > @@ -123,6 +123,7 @@ enum ieee80211_sta_info_flags { > #define HT_AGG_STATE_STOP_CB 7 > #define HT_AGG_STATE_SENT_ADDBA 8 > > +DECLARE_EWMA(avg_est_tp, 8, 16) > DECLARE_EWMA(avg_signal, 10, 8) > enum ieee80211_agg_stop_reason { > AGG_STOP_DECLINED, > @@ -641,6 +642,7 @@ struct sta_info { > s8 last_ack_signal; > bool ack_signal_filled; > struct ewma_avg_signal avg_ack_signal; > + struct ewma_avg_est_tp avg_est_tp; > } status_stats; > > /* Updated from TX path only, no locking requirements */ > diff --git a/net/mac80211/status.c b/net/mac80211/status.c > index e81e8a5bb774..647ade3719f5 100644 > --- a/net/mac80211/status.c > +++ b/net/mac80211/status.c > @@ -1145,6 +1145,28 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, > sta->status_stats.retry_failed++; > sta->status_stats.retry_count += retry_count; > > + if (skb && tx_time_est) { Shouldn't this be conditioned on actually being used (i.e., existence of get_expected_throughput op? Also maybe pull it out into its own function to make it clear what it's doing... > + /* max average packet size */ > + size_t pkt_size = skb->len > 1024 ? 1024 : skb->len; > + > + if (acked) { > + /* ACK packet size */ > + pkt_size += 14; > + /* SIFS x 2 */ > + tx_time_est += 2 * 2; > + } > + > + /* Backoff average x retries */ > + tx_time_est += retry_count ? retry_count * 2 : 2; > + > + /* failed tx */ > + if (!acked && !noack_success) > + pkt_size = 0; > + > + ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp, > + ((pkt_size * 8) * 1000) / tx_time_est); Could we avoid adding this division in the fast path? > + } > + > if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { > if (sdata->vif.type == NL80211_IFTYPE_STATION && > skb && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) > diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c > index b6b20f38de0e..d866a721690d 100644 > --- a/net/mac80211/tx.c > +++ b/net/mac80211/tx.c > @@ -3793,7 +3793,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, > IEEE80211_SKB_CB(skb)->control.vif = vif; > > if (vif && > - wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { > + (!local->ops->get_expected_throughput || > + wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))) { This implicitly enables AQL for every driver that doesn't set get_expected_throughput, no? That is probably not a good idea... -Toke
> > Since the integration of AQL, packet TX airtime estimation is > > calculated and counted to be used for the dequeue limit. > > > > Use this estimated airtime to compute expected throughput for > > each station. > > > > It will be a generic mac80211 implementation. If the driver has > > get_expected_throughput implementation, it will be used instead. > > > > Useful for L2 routing protocols, like B.A.T.M.A.N. > > > > Signed-off-by: Baligh Gasmi <gasmibal@gmail.com> > > --- > > net/mac80211/driver-ops.h | 2 ++ > > net/mac80211/sta_info.h | 2 ++ > > net/mac80211/status.c | 22 ++++++++++++++++++++++ > > net/mac80211/tx.c | 3 ++- > > 4 files changed, 28 insertions(+), 1 deletion(-) > > > > diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h > > index 4e2fc1a08681..4331b79647fa 100644 > > --- a/net/mac80211/driver-ops.h > > +++ b/net/mac80211/driver-ops.h > > @@ -1142,6 +1142,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, > > trace_drv_get_expected_throughput(&sta->sta); > > if (local->ops->get_expected_throughput && sta->uploaded) > > ret = local->ops->get_expected_throughput(&local->hw, &sta->sta); > > + else > > + ret = ewma_avg_est_tp_read(&sta->status_stats.avg_est_tp); > > trace_drv_return_u32(local, ret); > > > > return ret; > > diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h > > index 379fd367197f..fe60be4c671d 100644 > > --- a/net/mac80211/sta_info.h > > +++ b/net/mac80211/sta_info.h > > @@ -123,6 +123,7 @@ enum ieee80211_sta_info_flags { > > #define HT_AGG_STATE_STOP_CB 7 > > #define HT_AGG_STATE_SENT_ADDBA 8 > > > > +DECLARE_EWMA(avg_est_tp, 8, 16) > > DECLARE_EWMA(avg_signal, 10, 8) > > enum ieee80211_agg_stop_reason { > > AGG_STOP_DECLINED, > > @@ -641,6 +642,7 @@ struct sta_info { > > s8 last_ack_signal; > > bool ack_signal_filled; > > struct ewma_avg_signal avg_ack_signal; > > + struct ewma_avg_est_tp avg_est_tp; > > } status_stats; > > > > /* Updated from TX path only, no locking requirements */ > > diff --git a/net/mac80211/status.c b/net/mac80211/status.c > > index e81e8a5bb774..647ade3719f5 100644 > > --- a/net/mac80211/status.c > > +++ b/net/mac80211/status.c > > @@ -1145,6 +1145,28 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, > > sta->status_stats.retry_failed++; > > sta->status_stats.retry_count += retry_count; > > > > + if (skb && tx_time_est) { > > Shouldn't this be conditioned on actually being used (i.e., existence of > get_expected_throughput op? Also maybe pull it out into its own function > to make it clear what it's doing... It's already the case I think, since the tx_time_est is not-zero only when actually an estimated time is set. A dedicated function seems good for me, for clarity, yes. > > > + /* max average packet size */ > > + size_t pkt_size = skb->len > 1024 ? 1024 : skb->len; > > + > > + if (acked) { > > + /* ACK packet size */ > > + pkt_size += 14; > > + /* SIFS x 2 */ > > + tx_time_est += 2 * 2; > > + } > > + > > + /* Backoff average x retries */ > > + tx_time_est += retry_count ? retry_count * 2 : 2; > > + > > + /* failed tx */ > > + if (!acked && !noack_success) > > + pkt_size = 0; > > + > > + ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp, > > + ((pkt_size * 8) * 1000) / tx_time_est); > > Could we avoid adding this division in the fast path? Maybe we can use the do_div() macro for optimization, I don't see how we can avoid it. > > > + } > > + > > if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { > > if (sdata->vif.type == NL80211_IFTYPE_STATION && > > skb && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) > > diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c > > index b6b20f38de0e..d866a721690d 100644 > > --- a/net/mac80211/tx.c > > +++ b/net/mac80211/tx.c > > @@ -3793,7 +3793,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, > > IEEE80211_SKB_CB(skb)->control.vif = vif; > > > > if (vif && > > - wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { > > + (!local->ops->get_expected_throughput || > > + wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))) { > > This implicitly enables AQL for every driver that doesn't set > get_expected_throughput, no? That is probably not a good idea... No, AQL will be disabled if it's already the case, according to the NL80211_EXT_FEATURE_AQL, only airtime estimation will be activated with this diff. The function ieee80211_sta_update_pending_airtime() is already checking for NL80211_EXT_FEATURE_AQL. > > -Toke > Back to the base idea, what do you think about it, can it be a good road to take ? Other factors can be added progressively to better estimate the throughput...
On 25.05.22 14:14, Baligh GASMI wrote: >> > Since the integration of AQL, packet TX airtime estimation is >> > calculated and counted to be used for the dequeue limit. >> > >> > Use this estimated airtime to compute expected throughput for >> > each station. >> > >> > It will be a generic mac80211 implementation. If the driver has >> > get_expected_throughput implementation, it will be used instead. >> > >> > Useful for L2 routing protocols, like B.A.T.M.A.N. >> > >> > Signed-off-by: Baligh Gasmi <gasmibal@gmail.com> >> > --- >> > net/mac80211/driver-ops.h | 2 ++ >> > net/mac80211/sta_info.h | 2 ++ >> > net/mac80211/status.c | 22 ++++++++++++++++++++++ >> > net/mac80211/tx.c | 3 ++- >> > 4 files changed, 28 insertions(+), 1 deletion(-) >> > >> > diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h >> > index 4e2fc1a08681..4331b79647fa 100644 >> > --- a/net/mac80211/driver-ops.h >> > +++ b/net/mac80211/driver-ops.h >> > @@ -1142,6 +1142,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, >> > trace_drv_get_expected_throughput(&sta->sta); >> > if (local->ops->get_expected_throughput && sta->uploaded) >> > ret = local->ops->get_expected_throughput(&local->hw, &sta->sta); >> > + else >> > + ret = ewma_avg_est_tp_read(&sta->status_stats.avg_est_tp); >> > trace_drv_return_u32(local, ret); >> > >> > return ret; >> > diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h >> > index 379fd367197f..fe60be4c671d 100644 >> > --- a/net/mac80211/sta_info.h >> > +++ b/net/mac80211/sta_info.h >> > @@ -123,6 +123,7 @@ enum ieee80211_sta_info_flags { >> > #define HT_AGG_STATE_STOP_CB 7 >> > #define HT_AGG_STATE_SENT_ADDBA 8 >> > >> > +DECLARE_EWMA(avg_est_tp, 8, 16) >> > DECLARE_EWMA(avg_signal, 10, 8) >> > enum ieee80211_agg_stop_reason { >> > AGG_STOP_DECLINED, >> > @@ -641,6 +642,7 @@ struct sta_info { >> > s8 last_ack_signal; >> > bool ack_signal_filled; >> > struct ewma_avg_signal avg_ack_signal; >> > + struct ewma_avg_est_tp avg_est_tp; >> > } status_stats; >> > >> > /* Updated from TX path only, no locking requirements */ >> > diff --git a/net/mac80211/status.c b/net/mac80211/status.c >> > index e81e8a5bb774..647ade3719f5 100644 >> > --- a/net/mac80211/status.c >> > +++ b/net/mac80211/status.c >> > @@ -1145,6 +1145,28 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, >> > sta->status_stats.retry_failed++; >> > sta->status_stats.retry_count += retry_count; >> > >> > + if (skb && tx_time_est) { >> >> Shouldn't this be conditioned on actually being used (i.e., existence of >> get_expected_throughput op? Also maybe pull it out into its own function >> to make it clear what it's doing... > > It's already the case I think, since the tx_time_est is not-zero only > when actually an estimated time is set. > A dedicated function seems good for me, for clarity, yes. > >> >> > + /* max average packet size */ >> > + size_t pkt_size = skb->len > 1024 ? 1024 : skb->len; >> > + >> > + if (acked) { >> > + /* ACK packet size */ >> > + pkt_size += 14; >> > + /* SIFS x 2 */ >> > + tx_time_est += 2 * 2; >> > + } >> > + >> > + /* Backoff average x retries */ >> > + tx_time_est += retry_count ? retry_count * 2 : 2; >> > + >> > + /* failed tx */ >> > + if (!acked && !noack_success) >> > + pkt_size = 0; >> > + >> > + ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp, >> > + ((pkt_size * 8) * 1000) / tx_time_est); >> >> Could we avoid adding this division in the fast path? > > Maybe we can use the do_div() macro for optimization, I don't see how > we can avoid it. Here's one idea on how to make it less expensive and improve the average of the throughput estimation at the same time: In sta->status_stats add these fields: u32 last_tp_update; u32 pkt_size; u32 tx_time_est; And in this part of the code you do something like this: u32 diff = ((u32)jiffies) - sta->status_stats.last_tp_update; sta->status_stats.pkt_size += (pkt_size * 8) * 1000; sta->status_stats.tx_time_est += tx_time_est; if (diff > HZ / 10) { ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp, sta->status_stats.pkt_size / sta->status_stats.tx_time_est); sta->status_stats.pkt_size = 0; sta->status_stats.tx_time_est = 0; } This ensures that you will only get max. 1 update per 100ms interval. Does that make sense? - Felix
Indeed it's less expensive. I'll try to make something in this direction to see what it looks like. - Baligh
On 25.05.22 15:37, Baligh GASMI wrote: > Indeed it's less expensive. > > I'll try to make something in this direction to see what it looks like. Thanks. Please also make sure that all of this extra work is not performed for drivers that don't need it, because they either implement .get_expected_throughput, or use minstrel, which also implements it in a better way. - Felix
On 25.05.22 19:21, Dave Taht wrote: > Sigh. Ideally we are trying to keep latency low by deeply understanding > current conditions. Batching up anything on this large interval feels > wrong. Powersave and beacons become "beating" frequencies. Shorter intervals (or even doing it without batching at all) will likely lead to much more fluctuation, which could make the average less useful. I think batching it up will likely be beneficial for protocols like batman-adv. - Felix
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 4e2fc1a08681..4331b79647fa 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1142,6 +1142,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, trace_drv_get_expected_throughput(&sta->sta); if (local->ops->get_expected_throughput && sta->uploaded) ret = local->ops->get_expected_throughput(&local->hw, &sta->sta); + else + ret = ewma_avg_est_tp_read(&sta->status_stats.avg_est_tp); trace_drv_return_u32(local, ret); return ret; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 379fd367197f..fe60be4c671d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -123,6 +123,7 @@ enum ieee80211_sta_info_flags { #define HT_AGG_STATE_STOP_CB 7 #define HT_AGG_STATE_SENT_ADDBA 8 +DECLARE_EWMA(avg_est_tp, 8, 16) DECLARE_EWMA(avg_signal, 10, 8) enum ieee80211_agg_stop_reason { AGG_STOP_DECLINED, @@ -641,6 +642,7 @@ struct sta_info { s8 last_ack_signal; bool ack_signal_filled; struct ewma_avg_signal avg_ack_signal; + struct ewma_avg_est_tp avg_est_tp; } status_stats; /* Updated from TX path only, no locking requirements */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index e81e8a5bb774..647ade3719f5 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -1145,6 +1145,28 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, sta->status_stats.retry_failed++; sta->status_stats.retry_count += retry_count; + if (skb && tx_time_est) { + /* max average packet size */ + size_t pkt_size = skb->len > 1024 ? 1024 : skb->len; + + if (acked) { + /* ACK packet size */ + pkt_size += 14; + /* SIFS x 2 */ + tx_time_est += 2 * 2; + } + + /* Backoff average x retries */ + tx_time_est += retry_count ? retry_count * 2 : 2; + + /* failed tx */ + if (!acked && !noack_success) + pkt_size = 0; + + ewma_avg_est_tp_add(&sta->status_stats.avg_est_tp, + ((pkt_size * 8) * 1000) / tx_time_est); + } + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { if (sdata->vif.type == NL80211_IFTYPE_STATION && skb && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b6b20f38de0e..d866a721690d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3793,7 +3793,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, IEEE80211_SKB_CB(skb)->control.vif = vif; if (vif && - wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { + (!local->ops->get_expected_throughput || + wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))) { bool ampdu = txq->ac != IEEE80211_AC_VO; u32 airtime;
Since the integration of AQL, packet TX airtime estimation is calculated and counted to be used for the dequeue limit. Use this estimated airtime to compute expected throughput for each station. It will be a generic mac80211 implementation. If the driver has get_expected_throughput implementation, it will be used instead. Useful for L2 routing protocols, like B.A.T.M.A.N. Signed-off-by: Baligh Gasmi <gasmibal@gmail.com> --- net/mac80211/driver-ops.h | 2 ++ net/mac80211/sta_info.h | 2 ++ net/mac80211/status.c | 22 ++++++++++++++++++++++ net/mac80211/tx.c | 3 ++- 4 files changed, 28 insertions(+), 1 deletion(-)