[v3,3/6] mac80211: Add airtime accounting and scheduling to TXQs
diff mbox series

Message ID 1542063113-22438-4-git-send-email-rmanohar@codeaurora.org
State New
Headers show
Series
  • Move TXQ scheduling and airtime fairness into mac80211
Related show

Commit Message

Rajkumar Manoharan Nov. 12, 2018, 10:51 p.m. UTC
From: Toke Høiland-Jørgensen <toke@toke.dk>

This adds airtime accounting and scheduling to the mac80211 TXQ
scheduler. A new callback, ieee80211_sta_register_airtime(), is added
that drivers can call to report airtime usage for stations.

When airtime information is present, mac80211 will schedule TXQs
(through ieee80211_next_txq()) in a way that enforces airtime fairness
between active stations. This scheduling works the same way as the ath9k
in-driver airtime fairness scheduling. If no airtime usage is reported
by the driver, the scheduler will default to round-robin scheduling.

For drivers that don't control TXQ scheduling in software, a new API
function, ieee80211_txq_may_transmit(), is added which the driver can use
to check if the TXQ is eligible for transmission, or should be throttled to
enforce fairness. Calls to this function must also be enclosed in
ieee80211_txq_schedule_{start,end}() calls to ensure proper locking.

The API ieee80211_txq_may_transmit() also ensures that TXQ list will be
aligned aginst driver's own round-robin scheduler list. i.e it rotates
the TXQ list till it makes the requested node becomes the first entry
in TXQ list. Thus both the TXQ list and driver's list are in sync.

Co-Developed-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
---
 include/net/mac80211.h     | 59 ++++++++++++++++++++++++++++++
 net/mac80211/cfg.c         |  3 ++
 net/mac80211/debugfs.c     |  3 ++
 net/mac80211/debugfs_sta.c | 50 ++++++++++++++++++++++++--
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/main.c        |  4 +++
 net/mac80211/sta_info.c    | 44 +++++++++++++++++++++--
 net/mac80211/sta_info.h    | 13 +++++++
 net/mac80211/status.c      |  6 ++++
 net/mac80211/tx.c          | 90 +++++++++++++++++++++++++++++++++++++++++++---
 10 files changed, 264 insertions(+), 10 deletions(-)

Comments

Felix Fietkau Nov. 14, 2018, 10:57 a.m. UTC | #1
On 2018-11-12 23:51, Rajkumar Manoharan wrote:
> From: Toke Høiland-Jørgensen <toke@toke.dk>
> 
> This adds airtime accounting and scheduling to the mac80211 TXQ
> scheduler. A new callback, ieee80211_sta_register_airtime(), is added
> that drivers can call to report airtime usage for stations.
> 
> When airtime information is present, mac80211 will schedule TXQs
> (through ieee80211_next_txq()) in a way that enforces airtime fairness
> between active stations. This scheduling works the same way as the ath9k
> in-driver airtime fairness scheduling. If no airtime usage is reported
> by the driver, the scheduler will default to round-robin scheduling.
> 
> For drivers that don't control TXQ scheduling in software, a new API
> function, ieee80211_txq_may_transmit(), is added which the driver can use
> to check if the TXQ is eligible for transmission, or should be throttled to
> enforce fairness. Calls to this function must also be enclosed in
> ieee80211_txq_schedule_{start,end}() calls to ensure proper locking.
> 
> The API ieee80211_txq_may_transmit() also ensures that TXQ list will be
> aligned aginst driver's own round-robin scheduler list. i.e it rotates
> the TXQ list till it makes the requested node becomes the first entry
> in TXQ list. Thus both the TXQ list and driver's list are in sync.
> 
> Co-Developed-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
> Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
> ---
>  include/net/mac80211.h     | 59 ++++++++++++++++++++++++++++++
>  net/mac80211/cfg.c         |  3 ++
>  net/mac80211/debugfs.c     |  3 ++
>  net/mac80211/debugfs_sta.c | 50 ++++++++++++++++++++++++--
>  net/mac80211/ieee80211_i.h |  2 ++
>  net/mac80211/main.c        |  4 +++
>  net/mac80211/sta_info.c    | 44 +++++++++++++++++++++--
>  net/mac80211/sta_info.h    | 13 +++++++
>  net/mac80211/status.c      |  6 ++++
>  net/mac80211/tx.c          | 90 +++++++++++++++++++++++++++++++++++++++++++---
>  10 files changed, 264 insertions(+), 10 deletions(-)
> 
> diff --git a/net/mac80211/status.c b/net/mac80211/status.c
> index aa4afbf0abaf..a1f1256448f5 100644
> --- a/net/mac80211/status.c
> +++ b/net/mac80211/status.c
> @@ -818,6 +818,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
>  			ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data,
>  						acked, info->status.tx_time);
>  
> +		if (info->status.tx_time &&
> +		    wiphy_ext_feature_isset(local->hw.wiphy,
> +					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
> +			ieee80211_sta_register_airtime(&sta->sta, tid,
> +						       info->status.tx_time, 0);
> +
>  		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
>  			if (info->flags & IEEE80211_TX_STAT_ACK) {
>  				if (sta->status_stats.lost_packets)
I think the same is needed in ieee80211_tx_status_ext.

> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index 305965283506..3f417e80e041 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -3660,12 +3680,74 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>  
>  	if (list_empty(&txqi->schedule_order) &&
> -	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets))
> -		list_add_tail(&txqi->schedule_order,
> -			      &local->active_txqs[txq->ac]);
> +	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
> +		/* If airtime accounting is active, always enqueue STAs at the
> +		 * head of the list to ensure that they only get moved to the
> +		 * back by the airtime DRR scheduler once they have a negative
> +		 * deficit. A station that already has a negative deficit will
> +		 * get immediately moved to the back of the list on the next
> +		 * call to ieee80211_next_txq().
> +		 */
> +		if (txqi->txq.sta &&
> +		    wiphy_ext_feature_isset(local->hw.wiphy,
> +					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
> +			list_add(&txqi->schedule_order,
> +				 &local->active_txqs[txq->ac]);
> +		else
> +			list_add_tail(&txqi->schedule_order,
> +				      &local->active_txqs[txq->ac]);
> +	}
>  }
This part doesn't really make much sense to me, but maybe I'm
misunderstanding how the code works.
Let's assume we have a driver like ath9k or mt76, which tries to keep a
number of aggregates in the hardware queue, and the hardware queue is
currently empty.
If the current txq entry is kept at the head of the schedule list,
wouldn't the code just pull from that one over and over again, until
enough packets are transmitted by the hardware and their tx status
processed?
It seems to me that while fairness is still preserved in the long run,
this could lead to rather bursty scheduling, which may not be
particularly latency friendly.

- Felix
Toke Høiland-Jørgensen Nov. 14, 2018, 5:40 p.m. UTC | #2
Felix Fietkau <nbd@nbd.name> writes:

> On 2018-11-12 23:51, Rajkumar Manoharan wrote:
>> From: Toke Høiland-Jørgensen <toke@toke.dk>
>> 
>> This adds airtime accounting and scheduling to the mac80211 TXQ
>> scheduler. A new callback, ieee80211_sta_register_airtime(), is added
>> that drivers can call to report airtime usage for stations.
>> 
>> When airtime information is present, mac80211 will schedule TXQs
>> (through ieee80211_next_txq()) in a way that enforces airtime fairness
>> between active stations. This scheduling works the same way as the ath9k
>> in-driver airtime fairness scheduling. If no airtime usage is reported
>> by the driver, the scheduler will default to round-robin scheduling.
>> 
>> For drivers that don't control TXQ scheduling in software, a new API
>> function, ieee80211_txq_may_transmit(), is added which the driver can use
>> to check if the TXQ is eligible for transmission, or should be throttled to
>> enforce fairness. Calls to this function must also be enclosed in
>> ieee80211_txq_schedule_{start,end}() calls to ensure proper locking.
>> 
>> The API ieee80211_txq_may_transmit() also ensures that TXQ list will be
>> aligned aginst driver's own round-robin scheduler list. i.e it rotates
>> the TXQ list till it makes the requested node becomes the first entry
>> in TXQ list. Thus both the TXQ list and driver's list are in sync.
>> 
>> Co-Developed-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
>> ---
>>  include/net/mac80211.h     | 59 ++++++++++++++++++++++++++++++
>>  net/mac80211/cfg.c         |  3 ++
>>  net/mac80211/debugfs.c     |  3 ++
>>  net/mac80211/debugfs_sta.c | 50 ++++++++++++++++++++++++--
>>  net/mac80211/ieee80211_i.h |  2 ++
>>  net/mac80211/main.c        |  4 +++
>>  net/mac80211/sta_info.c    | 44 +++++++++++++++++++++--
>>  net/mac80211/sta_info.h    | 13 +++++++
>>  net/mac80211/status.c      |  6 ++++
>>  net/mac80211/tx.c          | 90 +++++++++++++++++++++++++++++++++++++++++++---
>>  10 files changed, 264 insertions(+), 10 deletions(-)
>> 
>> diff --git a/net/mac80211/status.c b/net/mac80211/status.c
>> index aa4afbf0abaf..a1f1256448f5 100644
>> --- a/net/mac80211/status.c
>> +++ b/net/mac80211/status.c
>> @@ -818,6 +818,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
>>  			ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data,
>>  						acked, info->status.tx_time);
>>  
>> +		if (info->status.tx_time &&
>> +		    wiphy_ext_feature_isset(local->hw.wiphy,
>> +					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
>> +			ieee80211_sta_register_airtime(&sta->sta, tid,
>> +						       info->status.tx_time, 0);
>> +
>>  		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
>>  			if (info->flags & IEEE80211_TX_STAT_ACK) {
>>  				if (sta->status_stats.lost_packets)
> I think the same is needed in ieee80211_tx_status_ext.

Right, good point.

>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>> index 305965283506..3f417e80e041 100644
>> --- a/net/mac80211/tx.c
>> +++ b/net/mac80211/tx.c
>> @@ -3660,12 +3680,74 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>  
>>  	if (list_empty(&txqi->schedule_order) &&
>> -	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets))
>> -		list_add_tail(&txqi->schedule_order,
>> -			      &local->active_txqs[txq->ac]);
>> +	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
>> +		/* If airtime accounting is active, always enqueue STAs at the
>> +		 * head of the list to ensure that they only get moved to the
>> +		 * back by the airtime DRR scheduler once they have a negative
>> +		 * deficit. A station that already has a negative deficit will
>> +		 * get immediately moved to the back of the list on the next
>> +		 * call to ieee80211_next_txq().
>> +		 */
>> +		if (txqi->txq.sta &&
>> +		    wiphy_ext_feature_isset(local->hw.wiphy,
>> +					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
>> +			list_add(&txqi->schedule_order,
>> +				 &local->active_txqs[txq->ac]);
>> +		else
>> +			list_add_tail(&txqi->schedule_order,
>> +				      &local->active_txqs[txq->ac]);
>> +	}
>>  }
> This part doesn't really make much sense to me, but maybe I'm
> misunderstanding how the code works.
> Let's assume we have a driver like ath9k or mt76, which tries to keep a
> number of aggregates in the hardware queue, and the hardware queue is
> currently empty.
> If the current txq entry is kept at the head of the schedule list,
> wouldn't the code just pull from that one over and over again, until
> enough packets are transmitted by the hardware and their tx status
> processed?
> It seems to me that while fairness is still preserved in the long run,
> this could lead to rather bursty scheduling, which may not be
> particularly latency friendly.

Yes, it'll be a bit more bursty when the hardware queue is completely
empty. However, when a TX completion comes back, that will adjust the
deficit of that sta and cause it to be rotated on the next dequeue. This
obviously relies on the fact that the lower-level hardware queue is
sufficiently shallow to not add a lot of latency. But we want that to be
the case anyway. In practice, it works quite well for ath9k, but not so
well for ath10k because it has a large buffer in firmware.

If we requeue the TXQ at the end of the list, a station that is taking
up too much airtime will fail to be throttled properly, so the
queue-at-head is kinda needed to ensure fairness...

-Toke
Louie Lu Nov. 15, 2018, 8:18 a.m. UTC | #3
Hi Rajkumar, Toke,

I found the series (v3,4/6) remove the debugfs remove reset station's
airtime method, and didn't added at here.

Not sure how to help this kind of situation, do I need a separate
patch to fix this, or posting the patch here is fine?

----

From 3a4a856c397345311c9d7f3679828cadc40e6a80 Mon Sep 17 00:00:00 2001
From: Louie Lu <git@louie.lu>
Date: Thu, 15 Nov 2018 16:13:57 +0800
Subject: [PATCH] mac80211: Add reset for station's airtime

Let user can reset station airtime status by debugfs, it will
reset all airtime deficit to `sta->airtime_weight` and reset rx/tx
airtime accumulate to 0.

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 446908ab3f5d..d84d2369a76e 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -233,7 +233,28 @@ static ssize_t sta_airtime_read(struct file
*file, char __user *userbuf,
     kfree(buf);
     return rv;
 }
-STA_OPS(airtime);
+
+/*
+ * FIXME: This *only* reset station airtime, didn't accept input
+ */
+static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
+                                 size_t count, loff_t *ppos)
+{
+    struct sta_info *sta = file->private_data;
+    struct ieee80211_local *local = sta->sdata->local;
+    int ac;
+
+    for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+        spin_lock_bh(&local->active_txq_lock[ac]);
+        sta->airtime[ac].rx_airtime = 0;
+        sta->airtime[ac].tx_airtime = 0;
+        sta->airtime[ac].deficit = sta->airtime_weight;
+        spin_unlock_bh(&local->active_txq_lock[ac]);
+    }
+
+    return count;
+}
+STA_OPS_RW(airtime);

 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
                     size_t count, loff_t *ppos)
Felix Fietkau Nov. 15, 2018, 11:09 a.m. UTC | #4
On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>> This part doesn't really make much sense to me, but maybe I'm
>> misunderstanding how the code works.
>> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>> number of aggregates in the hardware queue, and the hardware queue is
>> currently empty.
>> If the current txq entry is kept at the head of the schedule list,
>> wouldn't the code just pull from that one over and over again, until
>> enough packets are transmitted by the hardware and their tx status
>> processed?
>> It seems to me that while fairness is still preserved in the long run,
>> this could lead to rather bursty scheduling, which may not be
>> particularly latency friendly.
> 
> Yes, it'll be a bit more bursty when the hardware queue is completely
> empty. However, when a TX completion comes back, that will adjust the
> deficit of that sta and cause it to be rotated on the next dequeue. This
> obviously relies on the fact that the lower-level hardware queue is
> sufficiently shallow to not add a lot of latency. But we want that to be
> the case anyway. In practice, it works quite well for ath9k, but not so
> well for ath10k because it has a large buffer in firmware.
> 
> If we requeue the TXQ at the end of the list, a station that is taking
> up too much airtime will fail to be throttled properly, so the
> queue-at-head is kinda needed to ensure fairness...
Thanks for the explanation, that makes sense to me. I have an idea on
how to mitigate the burstiness within the driver. I'll write it down in
pseudocode, please let me know if you think that'll work.

do {
	struct ieee80211_txq *pending_txq[4];
	int n_pending_txq = 0;
	int i;

	if (hwq->pending < 4)
		break;

	nframes = 0;

	ieee80211_txq_schedule_start(hw, ac)
	do {
		bool requeue = false;

		struct ieee80211_txq *txq;

		txq = ieee80211_next_txq(hw, ac);
		if (!txq)
			break;

		nframes += schedule_txq(txq, &requeue);
		if (requeue)
			pending_txq[n_pending_txq++] = txq;

	} while (n_pending_txq < ARRAY_SIZE(pending_txq));

	for (i = n_pending_txq; i > 0; i--)
		ieee80211_return_txq(hw, pending_txq[i - 1]);

	ieee80211_txq_schedule_end(hw, ac)
} while (nframes);

- Felix
Toke Høiland-Jørgensen Nov. 15, 2018, 5:10 p.m. UTC | #5
Louie Lu <me@louie.lu> writes:

> Hi Rajkumar, Toke,
>
> I found the series (v3,4/6) remove the debugfs remove reset station's
> airtime method, and didn't added at here.
>
> Not sure how to help this kind of situation, do I need a separate
> patch to fix this, or posting the patch here is fine?

This is fine; we can fold it into the next version. Thanks :)

-Toke
Toke Høiland-Jørgensen Nov. 15, 2018, 5:24 p.m. UTC | #6
Felix Fietkau <nbd@nbd.name> writes:

> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>>> This part doesn't really make much sense to me, but maybe I'm
>>> misunderstanding how the code works.
>>> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>>> number of aggregates in the hardware queue, and the hardware queue is
>>> currently empty.
>>> If the current txq entry is kept at the head of the schedule list,
>>> wouldn't the code just pull from that one over and over again, until
>>> enough packets are transmitted by the hardware and their tx status
>>> processed?
>>> It seems to me that while fairness is still preserved in the long run,
>>> this could lead to rather bursty scheduling, which may not be
>>> particularly latency friendly.
>> 
>> Yes, it'll be a bit more bursty when the hardware queue is completely
>> empty. However, when a TX completion comes back, that will adjust the
>> deficit of that sta and cause it to be rotated on the next dequeue. This
>> obviously relies on the fact that the lower-level hardware queue is
>> sufficiently shallow to not add a lot of latency. But we want that to be
>> the case anyway. In practice, it works quite well for ath9k, but not so
>> well for ath10k because it has a large buffer in firmware.
>> 
>> If we requeue the TXQ at the end of the list, a station that is taking
>> up too much airtime will fail to be throttled properly, so the
>> queue-at-head is kinda needed to ensure fairness...
> Thanks for the explanation, that makes sense to me. I have an idea on
> how to mitigate the burstiness within the driver. I'll write it down in
> pseudocode, please let me know if you think that'll work.

I don't think it will, unfortunately. For example, consider the case
where there are two stations queued; one with a large negative deficit
(say, -10ms), and one with a positive deficit.

In this case, we really need to throttle the station with a negative
deficit. But if the driver loops and caches txqs, we'll get something
like the following:

- First driver loop iteration: returns TXQ with positive deficit.
- Second driver loop iteration: Only the negative-deficit TXQ is in the
  mac80211 list, so it will loop until that TXQ's deficit turns positive
  and return it.

Because of this, the negative-deficit station won't be throttled, and we
won't get fairness.

How many frames will mt76 queue up below the driver point? I.e., how
much burstiness are you expecting this will introduce on that driver?

Taking a step back, it's clear that it would be good to be able to
dequeue packets to multiple STAs at once (we need that for MU-MIMO on
ath10k as well). However, I don't think we can do that with the
round-robin fairness scheduler; so we are going to need a different
algorithm. I *think* it may be possible to do this with a virtual-time
scheduler, but I haven't sat down and worked out the details yet...

-Toke
Dave Taht Nov. 19, 2018, 5:55 p.m. UTC | #7
Toke Høiland-Jørgensen <toke@toke.dk> writes:

> Felix Fietkau <nbd@nbd.name> writes:
>
>> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>>>> This part doesn't really make much sense to me, but maybe I'm
>>>> misunderstanding how the code works.
>>>> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>>>> number of aggregates in the hardware queue, and the hardware queue is
>>>> currently empty.
>>>> If the current txq entry is kept at the head of the schedule list,
>>>> wouldn't the code just pull from that one over and over again, until
>>>> enough packets are transmitted by the hardware and their tx status
>>>> processed?
>>>> It seems to me that while fairness is still preserved in the long run,
>>>> this could lead to rather bursty scheduling, which may not be
>>>> particularly latency friendly.
>>> 
>>> Yes, it'll be a bit more bursty when the hardware queue is completely
>>> empty. However, when a TX completion comes back, that will adjust the
>>> deficit of that sta and cause it to be rotated on the next dequeue. This
>>> obviously relies on the fact that the lower-level hardware queue is
>>> sufficiently shallow to not add a lot of latency. But we want that to be
>>> the case anyway. In practice, it works quite well for ath9k, but not so
>>> well for ath10k because it has a large buffer in firmware.
>>> 
>>> If we requeue the TXQ at the end of the list, a station that is taking
>>> up too much airtime will fail to be throttled properly, so the
>>> queue-at-head is kinda needed to ensure fairness...
>> Thanks for the explanation, that makes sense to me. I have an idea on
>> how to mitigate the burstiness within the driver. I'll write it down in
>> pseudocode, please let me know if you think that'll work.
>
> I don't think it will, unfortunately. For example, consider the case
> where there are two stations queued; one with a large negative deficit
> (say, -10ms), and one with a positive deficit.

Perhaps a flag for one way or the other?

if(driver->has_absurd_hardware_queue_depth) doitthisway(); else
doitabetterway();

>
> In this case, we really need to throttle the station with a negative
> deficit. But if the driver loops and caches txqs, we'll get something
> like the following:
>
> - First driver loop iteration: returns TXQ with positive deficit.
> - Second driver loop iteration: Only the negative-deficit TXQ is in the
>   mac80211 list, so it will loop until that TXQ's deficit turns positive
>   and return it.
>
> Because of this, the negative-deficit station won't be throttled, and we
> won't get fairness.
>
> How many frames will mt76 queue up below the driver point? I.e., how
> much burstiness are you expecting this will introduce on that driver?
>
> Taking a step back, it's clear that it would be good to be able to
> dequeue packets to multiple STAs at once (we need that for MU-MIMO on
> ath10k as well). However, I don't think we can do that with the
> round-robin fairness scheduler; so we are going to need a different
> algorithm. I *think* it may be possible to do this with a virtual-time
> scheduler, but I haven't sat down and worked out the details yet...

The answer to which did not fit on the margins of your thesis. :)

I too have been trying to come up with a better means of gang
scheduling... for about 2 years now. In terms of bitmaps it looks a bit
like QFQ, but honestly...

Is there going to be some point where whatever we have here is
significantly better than what we had? Or not significantly worse? Or
handwavy enough to fix the rest once enlightenment arrives?

The perfect is the enemy of the good.

I'd rather like the intel folk to be weighing in on this stuff, too,
trying to get an API right requires use cases.

>
> -Toke
> _______________________________________________
> Make-wifi-fast mailing list
> Make-wifi-fast@lists.bufferbloat.net
> https://lists.bufferbloat.net/listinfo/make-wifi-fast
Toke Høiland-Jørgensen Nov. 19, 2018, 10:44 p.m. UTC | #8
Dave Taht <dave@taht.net> writes:

> Toke Høiland-Jørgensen <toke@toke.dk> writes:
>
>> Felix Fietkau <nbd@nbd.name> writes:
>>
>>> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>>>>> This part doesn't really make much sense to me, but maybe I'm
>>>>> misunderstanding how the code works.
>>>>> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>>>>> number of aggregates in the hardware queue, and the hardware queue is
>>>>> currently empty.
>>>>> If the current txq entry is kept at the head of the schedule list,
>>>>> wouldn't the code just pull from that one over and over again, until
>>>>> enough packets are transmitted by the hardware and their tx status
>>>>> processed?
>>>>> It seems to me that while fairness is still preserved in the long run,
>>>>> this could lead to rather bursty scheduling, which may not be
>>>>> particularly latency friendly.
>>>> 
>>>> Yes, it'll be a bit more bursty when the hardware queue is completely
>>>> empty. However, when a TX completion comes back, that will adjust the
>>>> deficit of that sta and cause it to be rotated on the next dequeue. This
>>>> obviously relies on the fact that the lower-level hardware queue is
>>>> sufficiently shallow to not add a lot of latency. But we want that to be
>>>> the case anyway. In practice, it works quite well for ath9k, but not so
>>>> well for ath10k because it has a large buffer in firmware.
>>>> 
>>>> If we requeue the TXQ at the end of the list, a station that is taking
>>>> up too much airtime will fail to be throttled properly, so the
>>>> queue-at-head is kinda needed to ensure fairness...
>>> Thanks for the explanation, that makes sense to me. I have an idea on
>>> how to mitigate the burstiness within the driver. I'll write it down in
>>> pseudocode, please let me know if you think that'll work.
>>
>> I don't think it will, unfortunately. For example, consider the case
>> where there are two stations queued; one with a large negative deficit
>> (say, -10ms), and one with a positive deficit.
>
> Perhaps a flag for one way or the other?
>
> if(driver->has_absurd_hardware_queue_depth) doitthisway(); else
> doitabetterway();

Well, there's going to be a BQL-like queue limit (but for airtime) on
top, which drivers can opt-in to if the hardware has too much queueing.

>> In this case, we really need to throttle the station with a negative
>> deficit. But if the driver loops and caches txqs, we'll get something
>> like the following:
>>
>> - First driver loop iteration: returns TXQ with positive deficit.
>> - Second driver loop iteration: Only the negative-deficit TXQ is in the
>>   mac80211 list, so it will loop until that TXQ's deficit turns positive
>>   and return it.
>>
>> Because of this, the negative-deficit station won't be throttled, and we
>> won't get fairness.
>>
>> How many frames will mt76 queue up below the driver point? I.e., how
>> much burstiness are you expecting this will introduce on that driver?
>>
>> Taking a step back, it's clear that it would be good to be able to
>> dequeue packets to multiple STAs at once (we need that for MU-MIMO on
>> ath10k as well). However, I don't think we can do that with the
>> round-robin fairness scheduler; so we are going to need a different
>> algorithm. I *think* it may be possible to do this with a virtual-time
>> scheduler, but I haven't sat down and worked out the details yet...
>
> The answer to which did not fit on the margins of your thesis. :)
>
> I too have been trying to come up with a better means of gang
> scheduling... for about 2 years now. In terms of bitmaps it looks a bit
> like QFQ, but honestly...

It's not the gang scheduling we need, deciding which devices to send to
at once is generally done in firmware anyway. We just need to be able to
dequeue packets for more than one station when possible. I don't think
we need the fancy bitmap stuff from QFQ since we don't have that many
stations to schedule at once; so we can probably live with O(log(n)) in
the number of active stations.

> Is there going to be some point where whatever we have here is
> significantly better than what we had? Or not significantly worse? Or
> handwavy enough to fix the rest once enlightenment arrives?
>
> The perfect is the enemy of the good.

Well, what we have now works for ath9k, works reasonably well for ath10k
in pull mode, not so well for ath10k in push mode, and then there's
Felix' comments in this thread...

> I'd rather like the intel folk to be weighing in on this stuff, too,
> trying to get an API right requires use cases.

Johannes has already reviewed a previous version, and I do believe he
said he'd review it again once we have converged on something :)

-Toke
Toke Høiland-Jørgensen Nov. 19, 2018, 11:02 p.m. UTC | #9
Hi Felix

Thinking a bit more about this, I think that rather than having the
driver work around the API as in your example...

> do {
> 	struct ieee80211_txq *pending_txq[4];
> 	int n_pending_txq = 0;
> 	int i;
>
> 	if (hwq->pending < 4)
> 		break;p
>
> 	nframes = 0;
>
> 	ieee80211_txq_schedule_start(hw, ac)
> 	do {
> 		bool requeue = false;
>
> 		struct ieee80211_txq *txq;
>
> 		txq = ieee80211_next_txq(hw, ac);
> 		if (!txq)
> 			break;
>
> 		nframes += schedule_txq(txq, &requeue);
> 		if (requeue)
> 			pending_txq[n_pending_txq++] = txq;
>
> 	} while (n_pending_txq < ARRAY_SIZE(pending_txq));
>
> 	for (i = n_pending_txq; i > 0; i--)
> 		ieee80211_return_txq(hw, pending_txq[i - 1]);
>
> 	ieee80211_txq_schedule_end(hw, ac)
> } while (nframes);

... really what we want is that the driver can just do this:

ieee80211_txq_schedule_start(hw, ac);
while ((txq = ieee80211_next_txq(hw, ac)) {
	schedule_txq(txq, &requeue);
        return_txq(txq);
}
ieee80211_txq_schedule_end(hw, ac);

and expect so get through all eligible TXQs. Note that there will be
cases where there is only a single eligible TXQ (such as the example I
gave in the other email); in which case the current version is fine. But
there is (probably) also going to be cases where more than one TXQ is
eligible at the same time, which we cannot handle with the current RR
scheduler.

However, I think that assuming we can get the scheduler to guarantee
that it will return all eligible TXQs between each pair of calls to
schedule_start()/schedule_end(), we should be fine with the current API.
Do you agree with this?

-Toke
Dave Taht Nov. 19, 2018, 11:30 p.m. UTC | #10
Toke Høiland-Jørgensen <toke@toke.dk> writes:

> Dave Taht <dave@taht.net> writes:
>
>> Toke Høiland-Jørgensen <toke@toke.dk> writes:
>>
>>> Felix Fietkau <nbd@nbd.name> writes:
>>>
>>>> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>>>>>> This part doesn't really make much sense to me, but maybe I'm
>>>>>> misunderstanding how the code works.
>>>>>> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>>>>>> number of aggregates in the hardware queue, and the hardware queue is
>>>>>> currently empty.
>>>>>> If the current txq entry is kept at the head of the schedule list,
>>>>>> wouldn't the code just pull from that one over and over again, until
>>>>>> enough packets are transmitted by the hardware and their tx status
>>>>>> processed?
>>>>>> It seems to me that while fairness is still preserved in the long run,
>>>>>> this could lead to rather bursty scheduling, which may not be
>>>>>> particularly latency friendly.
>>>>> 
>>>>> Yes, it'll be a bit more bursty when the hardware queue is completely
>>>>> empty. However, when a TX completion comes back, that will adjust the
>>>>> deficit of that sta and cause it to be rotated on the next dequeue. This
>>>>> obviously relies on the fact that the lower-level hardware queue is
>>>>> sufficiently shallow to not add a lot of latency. But we want that to be
>>>>> the case anyway. In practice, it works quite well for ath9k, but not so
>>>>> well for ath10k because it has a large buffer in firmware.
>>>>> 
>>>>> If we requeue the TXQ at the end of the list, a station that is taking
>>>>> up too much airtime will fail to be throttled properly, so the
>>>>> queue-at-head is kinda needed to ensure fairness...
>>>> Thanks for the explanation, that makes sense to me. I have an idea on
>>>> how to mitigate the burstiness within the driver. I'll write it down in
>>>> pseudocode, please let me know if you think that'll work.
>>>
>>> I don't think it will, unfortunately. For example, consider the case
>>> where there are two stations queued; one with a large negative deficit
>>> (say, -10ms), and one with a positive deficit.
>>
>> Perhaps a flag for one way or the other?
>>
>> if(driver->has_absurd_hardware_queue_depth) doitthisway(); else
>> doitabetterway();
>
> Well, there's going to be a BQL-like queue limit (but for airtime) on
> top, which drivers can opt-in to if the hardware has too much queueing.
>
>>> In this case, we really need to throttle the station with a negative
>>> deficit. But if the driver loops and caches txqs, we'll get something
>>> like the following:
>>>
>>> - First driver loop iteration: returns TXQ with positive deficit.
>>> - Second driver loop iteration: Only the negative-deficit TXQ is in the
>>>   mac80211 list, so it will loop until that TXQ's deficit turns positive
>>>   and return it.
>>>
>>> Because of this, the negative-deficit station won't be throttled, and we
>>> won't get fairness.
>>>
>>> How many frames will mt76 queue up below the driver point? I.e., how
>>> much burstiness are you expecting this will introduce on that driver?
>>>
>>> Taking a step back, it's clear that it would be good to be able to
>>> dequeue packets to multiple STAs at once (we need that for MU-MIMO on
>>> ath10k as well). However, I don't think we can do that with the
>>> round-robin fairness scheduler; so we are going to need a different
>>> algorithm. I *think* it may be possible to do this with a virtual-time
>>> scheduler, but I haven't sat down and worked out the details yet...
>>
>> The answer to which did not fit on the margins of your thesis. :)
>>
>> I too have been trying to come up with a better means of gang
>> scheduling... for about 2 years now. In terms of bitmaps it looks a bit
>> like QFQ, but honestly...
>
> It's not the gang scheduling we need, deciding which devices to send to
> at once is generally done in firmware anyway.

I have a long held dream that one day some firmware will be able to send
an interrupt and some information along...

"Hi, I'll be done transmitting/receiving in about 1ms, here's who I
think I can talk to next, and here's who else I maybe could gang schedule".

That would let us get away from 5ms wasted in the "ready to go" portion
of the algo, and share the highest likelyhood "groups" with the higher
layer.

> We just need to be able to
> dequeue packets for more than one station when possible.

And a huge fantasy is in some future 802.11ZZZ standard the on-board firmware and
the linux drivers can be co-designed, even, dare I say, open sourced, to
better evolve to meet real world requirements.

mbox's per station would be nice, with scatter/gather I/O... I
can think of a zillion things I'd want the firmware to handle (other
than buffering)

> I don't think
> we need the fancy bitmap stuff from QFQ since we don't have that many
> stations to schedule at once; so we can probably live with O(log(n)) in
> the number of active stations.

Best of two or three "groups", per above, from the firmware.

>> Is there going to be some point where whatever we have here is
>> significantly better than what we had? Or not significantly worse? Or
>> handwavy enough to fix the rest once enlightenment arrives?
>>
>> The perfect is the enemy of the good.
>
> Well, what we have now works for ath9k, works reasonably well for ath10k
> in pull mode, not so well for ath10k in push mode, and then there's
> Felix' comments in this thread...

So how about, an ath10k in a friggin "co-operative" mode? 

What are the performance differences in ath10k in push mode? Why do we
care if this mode works at all? 

Perfect, verses "good".

>> I'd rather like the intel folk to be weighing in on this stuff, too,
>> trying to get an API right requires use cases.
>
> Johannes has already reviewed a previous version, and I do believe he
> said he'd review it again once we have converged on something :)

Would intel care if only the pull mode worked well on their hardware? Do
they have a pull or push mode?
Dave Taht Nov. 19, 2018, 11:47 p.m. UTC | #11
On Mon, Nov 19, 2018 at 3:30 PM Simon Barber <simon@superduper.net> wrote:
>
>
>
> On Nov 19, 2018, at 2:44 PM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>
> Dave Taht <dave@taht.net> writes:
>
> Toke Høiland-Jørgensen <toke@toke.dk> writes:
>
> Felix Fietkau <nbd@nbd.name> writes:
>
> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>
> This part doesn't really make much sense to me, but maybe I'm
> misunderstanding how the code works.
> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>
> ….
>
>
> Well, there's going to be a BQL-like queue limit (but for airtime) on
> top, which drivers can opt-in to if the hardware has too much queueing.
>
>
> Very happy to read this - I first talked to Dave Taht about the need for Time Queue Limits more than 5 years ago!

Michal faked up a dql estimator 3 (?) years ago. it worked.

http://blog.cerowrt.org/post/dql_on_wifi_2/

As a side note, in *any* real world working mu-mimo situation at any
scale, on any equipment, does anyone have any stats on how often the
feature is actually used and useful?

My personal guess, from looking at the standard, was in home
scenarios, usage would be about... 0, and in a controlled environment
in a football stadium, quite a lot.

In a office or apartment complex, I figured interference and so forth
would make it a negative benefit due to retransmits.

I felt when that part of the standard rolled around... that mu-mimo
was an idea that should never have escaped the lab. I can be convinced
by data, that we can aim for a higher goal here. But it would be
comforting to have a measured non-lab, real-world, at real world
rates, result for it, on some platform, of it actually being useful.

> Simon
>
> _______________________________________________
> Make-wifi-fast mailing list
> Make-wifi-fast@lists.bufferbloat.net
> https://lists.bufferbloat.net/listinfo/make-wifi-fast
Ben Greear Nov. 19, 2018, 11:56 p.m. UTC | #12
On 11/19/2018 03:47 PM, Dave Taht wrote:
> On Mon, Nov 19, 2018 at 3:30 PM Simon Barber <simon@superduper.net> wrote:
>>
>>
>>
>> On Nov 19, 2018, at 2:44 PM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>>
>> Dave Taht <dave@taht.net> writes:
>>
>> Toke Høiland-Jørgensen <toke@toke.dk> writes:
>>
>> Felix Fietkau <nbd@nbd.name> writes:
>>
>> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>>
>> This part doesn't really make much sense to me, but maybe I'm
>> misunderstanding how the code works.
>> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>>
>> ….
>>
>>
>> Well, there's going to be a BQL-like queue limit (but for airtime) on
>> top, which drivers can opt-in to if the hardware has too much queueing.
>>
>>
>> Very happy to read this - I first talked to Dave Taht about the need for Time Queue Limits more than 5 years ago!
>
> Michal faked up a dql estimator 3 (?) years ago. it worked.
>
> http://blog.cerowrt.org/post/dql_on_wifi_2/
>
> As a side note, in *any* real world working mu-mimo situation at any
> scale, on any equipment, does anyone have any stats on how often the
> feature is actually used and useful?
>
> My personal guess, from looking at the standard, was in home
> scenarios, usage would be about... 0, and in a controlled environment
> in a football stadium, quite a lot.
>
> In a office or apartment complex, I figured interference and so forth
> would make it a negative benefit due to retransmits.
>
> I felt when that part of the standard rolled around... that mu-mimo
> was an idea that should never have escaped the lab. I can be convinced
> by data, that we can aim for a higher goal here. But it would be
> comforting to have a measured non-lab, real-world, at real world
> rates, result for it, on some platform, of it actually being useful.

We're working on building a lab with 20 or 30 mixed 'real' devices
using various different /AC NICs (QCA wave2 on OpenWRT, Fedora, realtek USB 8812au on OpenWRT, Fedora,
and some Intel NICs in NUCs on Windows, and maybe more).  I'm not actually sure if that realtek
  or the NUCs can do MU-MIMO or not, but the QCA NICs will be able to.  It should be at least somewhat similar
to a classroom environment or coffee shop.  I'll let you know what we find
as far as how well MU-MIMO improves things or not.

At least in simple test cases (one 1x1 stations, one 2x2 station, with 4x4 MU-MIMO AP),
it works very well for increased download throughput.

In home setups, I'd guess that the DSL or Cable Modem or other uplink is the bottleneck
way more often than the wifi is, even if your are just running /n.  But, maybe that is just
my experience living out at the end of a long skinny phone line all these years.

Thanks,
Ben
Dave Taht Nov. 20, 2018, 12:13 a.m. UTC | #13
On Mon, Nov 19, 2018 at 3:56 PM Ben Greear <greearb@candelatech.com> wrote:
>
> On 11/19/2018 03:47 PM, Dave Taht wrote:
> > On Mon, Nov 19, 2018 at 3:30 PM Simon Barber <simon@superduper.net> wrote:
> >>
> >>
> >>
> >> On Nov 19, 2018, at 2:44 PM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
> >>
> >> Dave Taht <dave@taht.net> writes:
> >>
> >> Toke Høiland-Jørgensen <toke@toke.dk> writes:
> >>
> >> Felix Fietkau <nbd@nbd.name> writes:
> >>
> >> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
> >>
> >> This part doesn't really make much sense to me, but maybe I'm
> >> misunderstanding how the code works.
> >> Let's assume we have a driver like ath9k or mt76, which tries to keep a
> >>
> >> ….
> >>
> >>
> >> Well, there's going to be a BQL-like queue limit (but for airtime) on
> >> top, which drivers can opt-in to if the hardware has too much queueing.
> >>
> >>
> >> Very happy to read this - I first talked to Dave Taht about the need for Time Queue Limits more than 5 years ago!
> >
> > Michal faked up a dql estimator 3 (?) years ago. it worked.
> >
> > http://blog.cerowrt.org/post/dql_on_wifi_2/
> >
> > As a side note, in *any* real world working mu-mimo situation at any
> > scale, on any equipment, does anyone have any stats on how often the
> > feature is actually used and useful?
> >
> > My personal guess, from looking at the standard, was in home
> > scenarios, usage would be about... 0, and in a controlled environment
> > in a football stadium, quite a lot.
> >
> > In a office or apartment complex, I figured interference and so forth
> > would make it a negative benefit due to retransmits.
> >
> > I felt when that part of the standard rolled around... that mu-mimo
> > was an idea that should never have escaped the lab. I can be convinced
> > by data, that we can aim for a higher goal here. But it would be
> > comforting to have a measured non-lab, real-world, at real world
> > rates, result for it, on some platform, of it actually being useful.
>
> We're working on building a lab with 20 or 30 mixed 'real' devices
> using various different /AC NICs (QCA wave2 on OpenWRT, Fedora, realtek USB 8812au on OpenWRT, Fedora,
> and some Intel NICs in NUCs on Windows, and maybe more).  I'm not actually sure if that realtek
>   or the NUCs can do MU-MIMO or not, but the QCA NICs will be able to.  It should be at least somewhat similar
> to a classroom environment or coffee shop.

In the last 3 coffee shops I went to, I could hear over 30 APs on
competing SSIDs, running G, N, and AC,
occupying every available channel.

> I'll let you know what we find
> as far as how well MU-MIMO improves things or not.

Thank you. My lab is shut down and I'm selling off the gear, so all I
can do anymore is be grumpy.

>
> At least in simple test cases (one 1x1 stations, one 2x2 station, with 4x4 MU-MIMO AP),
> it works very well for increased download throughput.

Is that a UDP or TCP test?

My specific "most important" benchmark for wifi is web PLT with lots
of stations. That's the most common wifi use case, IMHO.
Some videoconferencing, voip, ssh, etc. One big upload, one big
download, going on somewhere, sometimes. Slashdot
for me is 78 separate ssl'd tcp flows, a dozen + dns lookups, over 2.2 seconds.

bulk downloads rarely figure in except for streaming video, and that
peaks out generally at 20Mbit/sec for most people.

So while I do like the potential in mu-mimo to, schedule lower service
times on a per station basis, I see too many
variable rates and interference in the real world, and not enough
simultaneity between schedulable stations, and a ton of acks,
for me to imagine there's much of a real-world difference from mu-mimo
in anything other than a well managed office or stadium.

People going for the max download throughput with the max number of
stations... PLT, PLT is a way better benchmark.

I can certainly be convinced by data, and I look forward to your experiments.

> In home setups, I'd guess that the DSL or Cable Modem or other uplink is the bottleneck

Offices too. Wifi - aside from interference and range issues lowering
the rate, only becomes the bottleneck at internet speeds >
40Mbits/sec.

We are certainly starting to see wifi become more of the bottleneck,
while running at rates, generally, far lower than what people bench
at.

> way more often than the wifi is, even if your are just running /n.  But, maybe that is just
> my experience living out at the end of a long skinny phone line all these years.

Fiber networks and newer cable connections now crack 100mbits, and
there you do see all that nifty fq_codel-ly chocolatey goodness
kicking in....

but I do tend to think the optimization focus should be at low rates
with lots of stations for plt more than bandwidth.

> Thanks,
> Ben
>
>
>
> --
> Ben Greear <greearb@candelatech.com>
> Candela Technologies Inc  http://www.candelatech.com
>
Ben Greear Nov. 20, 2018, 12:20 a.m. UTC | #14
On 11/19/2018 04:13 PM, Dave Taht wrote:
> On Mon, Nov 19, 2018 at 3:56 PM Ben Greear <greearb@candelatech.com> wrote:
>>
>> On 11/19/2018 03:47 PM, Dave Taht wrote:
>>> On Mon, Nov 19, 2018 at 3:30 PM Simon Barber <simon@superduper.net> wrote:
>>>>
>>>>
>>>>
>>>> On Nov 19, 2018, at 2:44 PM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>>>>
>>>> Dave Taht <dave@taht.net> writes:
>>>>
>>>> Toke Høiland-Jørgensen <toke@toke.dk> writes:
>>>>
>>>> Felix Fietkau <nbd@nbd.name> writes:
>>>>
>>>> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>>>>
>>>> This part doesn't really make much sense to me, but maybe I'm
>>>> misunderstanding how the code works.
>>>> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>>>>
>>>> ….
>>>>
>>>>
>>>> Well, there's going to be a BQL-like queue limit (but for airtime) on
>>>> top, which drivers can opt-in to if the hardware has too much queueing.
>>>>
>>>>
>>>> Very happy to read this - I first talked to Dave Taht about the need for Time Queue Limits more than 5 years ago!
>>>
>>> Michal faked up a dql estimator 3 (?) years ago. it worked.
>>>
>>> http://blog.cerowrt.org/post/dql_on_wifi_2/
>>>
>>> As a side note, in *any* real world working mu-mimo situation at any
>>> scale, on any equipment, does anyone have any stats on how often the
>>> feature is actually used and useful?
>>>
>>> My personal guess, from looking at the standard, was in home
>>> scenarios, usage would be about... 0, and in a controlled environment
>>> in a football stadium, quite a lot.
>>>
>>> In a office or apartment complex, I figured interference and so forth
>>> would make it a negative benefit due to retransmits.
>>>
>>> I felt when that part of the standard rolled around... that mu-mimo
>>> was an idea that should never have escaped the lab. I can be convinced
>>> by data, that we can aim for a higher goal here. But it would be
>>> comforting to have a measured non-lab, real-world, at real world
>>> rates, result for it, on some platform, of it actually being useful.
>>
>> We're working on building a lab with 20 or 30 mixed 'real' devices
>> using various different /AC NICs (QCA wave2 on OpenWRT, Fedora, realtek USB 8812au on OpenWRT, Fedora,
>> and some Intel NICs in NUCs on Windows, and maybe more).  I'm not actually sure if that realtek
>>   or the NUCs can do MU-MIMO or not, but the QCA NICs will be able to.  It should be at least somewhat similar
>> to a classroom environment or coffee shop.
>
> In the last 3 coffee shops I went to, I could hear over 30 APs on
> competing SSIDs, running G, N, and AC,
> occupying every available channel.

I especially like when someone uses channel 3 because, I guess, they
think it is un-used :)

I'm not sure if this was a fluke or not, but at Starbucks recently I sat outside,
right next to their window, and could not scan their AP at all.  Previously, I sat
inside, 3 feet away through the glass, and got great signal.  I wonder what that was
all about!  Maybe special tinting that blocks RF?  Or just dumb luck of some sort.

Thanks,
Ben
Dave Taht Nov. 20, 2018, 12:37 a.m. UTC | #15
On Mon, Nov 19, 2018 at 4:20 PM Ben Greear <greearb@candelatech.com> wrote:
>
> On 11/19/2018 04:13 PM, Dave Taht wrote:
> > On Mon, Nov 19, 2018 at 3:56 PM Ben Greear <greearb@candelatech.com> wrote:
> >>
> >> On 11/19/2018 03:47 PM, Dave Taht wrote:
> >>> On Mon, Nov 19, 2018 at 3:30 PM Simon Barber <simon@superduper.net> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On Nov 19, 2018, at 2:44 PM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
> >>>>
> >>>> Dave Taht <dave@taht.net> writes:
> >>>>
> >>>> Toke Høiland-Jørgensen <toke@toke.dk> writes:
> >>>>
> >>>> Felix Fietkau <nbd@nbd.name> writes:
> >>>>
> >>>> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
> >>>>
> >>>> This part doesn't really make much sense to me, but maybe I'm
> >>>> misunderstanding how the code works.
> >>>> Let's assume we have a driver like ath9k or mt76, which tries to keep a
> >>>>
> >>>> ….
> >>>>
> >>>>
> >>>> Well, there's going to be a BQL-like queue limit (but for airtime) on
> >>>> top, which drivers can opt-in to if the hardware has too much queueing.
> >>>>
> >>>>
> >>>> Very happy to read this - I first talked to Dave Taht about the need for Time Queue Limits more than 5 years ago!
> >>>
> >>> Michal faked up a dql estimator 3 (?) years ago. it worked.
> >>>
> >>> http://blog.cerowrt.org/post/dql_on_wifi_2/
> >>>
> >>> As a side note, in *any* real world working mu-mimo situation at any
> >>> scale, on any equipment, does anyone have any stats on how often the
> >>> feature is actually used and useful?
> >>>
> >>> My personal guess, from looking at the standard, was in home
> >>> scenarios, usage would be about... 0, and in a controlled environment
> >>> in a football stadium, quite a lot.
> >>>
> >>> In a office or apartment complex, I figured interference and so forth
> >>> would make it a negative benefit due to retransmits.
> >>>
> >>> I felt when that part of the standard rolled around... that mu-mimo
> >>> was an idea that should never have escaped the lab. I can be convinced
> >>> by data, that we can aim for a higher goal here. But it would be
> >>> comforting to have a measured non-lab, real-world, at real world
> >>> rates, result for it, on some platform, of it actually being useful.
> >>
> >> We're working on building a lab with 20 or 30 mixed 'real' devices
> >> using various different /AC NICs (QCA wave2 on OpenWRT, Fedora, realtek USB 8812au on OpenWRT, Fedora,
> >> and some Intel NICs in NUCs on Windows, and maybe more).  I'm not actually sure if that realtek
> >>   or the NUCs can do MU-MIMO or not, but the QCA NICs will be able to.  It should be at least somewhat similar
> >> to a classroom environment or coffee shop.
> >
> > In the last 3 coffee shops I went to, I could hear over 30 APs on
> > competing SSIDs, running G, N, and AC,
> > occupying every available channel.
>
> I especially like when someone uses channel 3 because, I guess, they
> think it is un-used :)

I think avery actually found a case where that was a benefit, in an
apartment building that had each per-apartment AP
located at exactly the same place on every floor.

I do wish I could go back in time and explain the four colour theorem
to whoever allocated the 2.4ghz wifi band, and then explain even that
was a planar rather than 3D problem.

The 3D coloring problem I visualize as the scene in Aliens 2, where
the monsters just waltz in over the ceiling panels. To me it's an
indelible image kind of superimposed over the interfering waves in the
air, full of gore, goo, and blood...

>
> I'm not sure if this was a fluke or not, but at Starbucks recently I sat outside,
> right next to their window, and could not scan their AP at all.  Previously, I sat
> inside, 3 feet away through the glass, and got great signal.  I wonder what that was
> all about!  Maybe special tinting that blocks RF?  Or just dumb luck of some sort.

Ya know, I could definitely see a market for a material like that! I'd
like it for my car, so bluetooth wouldn't escape.

anyway, just blowing off steam. :) When v4 of this rolls around + BQL?
I can get some results back on it too. One less than perfect option or
the other would be better than continuing to have lousy wifi in all
cases on all platforms.

> Thanks,
> Ben
>
>
> --
> Ben Greear <greearb@candelatech.com>
> Candela Technologies Inc  http://www.candelatech.com
>
Dave Taht Nov. 20, 2018, 1:04 a.m. UTC | #16
On Mon, Nov 19, 2018 at 4:52 PM Simon Barber <simon@superduper.net> wrote:
>
> Low-e glass, it’s a thin metallic film used to reflect infra-red to keep heat in or out. Totally blocks/reflects RF.

Very cool. I imagine it's hell on cell too?

I can see this stuff becoming very popular in places where keeping the
good wifi in is important. Could cover floors and ceilings with it to.
Cars could be tempest rated...

/me goes looking for stock to buy

> Simon
>
> On Nov 19, 2018, at 4:20 PM, Ben Greear <greearb@candelatech.com> wrote:
>
> On 11/19/2018 04:13 PM, Dave Taht wrote:
>
> On Mon, Nov 19, 2018 at 3:56 PM Ben Greear <greearb@candelatech.com> wrote:
>
>
> On 11/19/2018 03:47 PM, Dave Taht wrote:
>
> On Mon, Nov 19, 2018 at 3:30 PM Simon Barber <simon@superduper.net> wrote:
>
>
>
>
> On Nov 19, 2018, at 2:44 PM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>
> Dave Taht <dave@taht.net> writes:
>
> Toke Høiland-Jørgensen <toke@toke.dk> writes:
>
> Felix Fietkau <nbd@nbd.name> writes:
>
> On 2018-11-14 18:40, Toke Høiland-Jørgensen wrote:
>
> This part doesn't really make much sense to me, but maybe I'm
> misunderstanding how the code works.
> Let's assume we have a driver like ath9k or mt76, which tries to keep a
>
> ….
>
>
> Well, there's going to be a BQL-like queue limit (but for airtime) on
> top, which drivers can opt-in to if the hardware has too much queueing.
>
>
> Very happy to read this - I first talked to Dave Taht about the need for Time Queue Limits more than 5 years ago!
>
>
> Michal faked up a dql estimator 3 (?) years ago. it worked.
>
> http://blog.cerowrt.org/post/dql_on_wifi_2/
>
> As a side note, in *any* real world working mu-mimo situation at any
> scale, on any equipment, does anyone have any stats on how often the
> feature is actually used and useful?
>
> My personal guess, from looking at the standard, was in home
> scenarios, usage would be about... 0, and in a controlled environment
> in a football stadium, quite a lot.
>
> In a office or apartment complex, I figured interference and so forth
> would make it a negative benefit due to retransmits.
>
> I felt when that part of the standard rolled around... that mu-mimo
> was an idea that should never have escaped the lab. I can be convinced
> by data, that we can aim for a higher goal here. But it would be
> comforting to have a measured non-lab, real-world, at real world
> rates, result for it, on some platform, of it actually being useful.
>
>
> We're working on building a lab with 20 or 30 mixed 'real' devices
> using various different /AC NICs (QCA wave2 on OpenWRT, Fedora, realtek USB 8812au on OpenWRT, Fedora,
> and some Intel NICs in NUCs on Windows, and maybe more).  I'm not actually sure if that realtek
>  or the NUCs can do MU-MIMO or not, but the QCA NICs will be able to.  It should be at least somewhat similar
> to a classroom environment or coffee shop.
>
>
> In the last 3 coffee shops I went to, I could hear over 30 APs on
> competing SSIDs, running G, N, and AC,
> occupying every available channel.
>
>
> I especially like when someone uses channel 3 because, I guess, they
> think it is un-used :)
>
> I'm not sure if this was a fluke or not, but at Starbucks recently I sat outside,
> right next to their window, and could not scan their AP at all.  Previously, I sat
> inside, 3 feet away through the glass, and got great signal.  I wonder what that was
> all about!  Maybe special tinting that blocks RF?  Or just dumb luck of some sort.
>
> Thanks,
> Ben
>
>
> --
> Ben Greear <greearb@candelatech.com>
> Candela Technologies Inc  http://www.candelatech.com
>
>
David Lang Nov. 20, 2018, 2:12 a.m. UTC | #17
On Mon, 19 Nov 2018, Dave Taht wrote:

>> I'm not sure if this was a fluke or not, but at Starbucks recently I sat outside,
>> right next to their window, and could not scan their AP at all.  Previously, I sat
>> inside, 3 feet away through the glass, and got great signal.  I wonder what that was
>> all about!  Maybe special tinting that blocks RF?  Or just dumb luck of some sort.
>
> Ya know, I could definitely see a market for a material like that! I'd
> like it for my car, so bluetooth wouldn't escape.

That would break your tire pressure sensors (each car is rolling around 
broadcasting 4 unique bluetooth IDs, not hard to track)

David Lang
Toke Høiland-Jørgensen Dec. 4, 2018, 2:55 p.m. UTC | #18
Felix Fietkau <nbd@nbd.name> writes:

>> diff --git a/net/mac80211/status.c b/net/mac80211/status.c
>> index aa4afbf0abaf..a1f1256448f5 100644
>> --- a/net/mac80211/status.c
>> +++ b/net/mac80211/status.c
>> @@ -818,6 +818,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
>>  			ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data,
>>  						acked, info->status.tx_time);
>>  
>> +		if (info->status.tx_time &&
>> +		    wiphy_ext_feature_isset(local->hw.wiphy,
>> +					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
>> +			ieee80211_sta_register_airtime(&sta->sta, tid,
>> +						       info->status.tx_time, 0);
>> +
>>  		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
>>  			if (info->flags & IEEE80211_TX_STAT_ACK) {
>>  				if (sta->status_stats.lost_packets)
> I think the same is needed in ieee80211_tx_status_ext.

So finally circled back to this. In ieee80211_tx_status_ext() we don't
have an skb, so we don't know which TID the packet was sent to; what
airtime information would the driver actually provide in this case? Is
it an aggregate of all ACs, or?

-Toke
Johannes Berg Dec. 18, 2018, 12:11 p.m. UTC | #19
On Thu, 2018-11-15 at 09:10 -0800, Toke Høiland-Jørgensen wrote:
> Louie Lu <me@louie.lu> writes:
> 
> > Hi Rajkumar, Toke,
> > 
> > I found the series (v3,4/6) remove the debugfs remove reset station's
> > airtime method, and didn't added at here.
> > 
> > Not sure how to help this kind of situation, do I need a separate
> > patch to fix this, or posting the patch here is fine?
> 
> This is fine; we can fold it into the next version. Thanks :)

Just FYI - I'm going to assume, given this comment and the long
discussion, that there will be a next version :)

johannes
Dave Taht Dec. 18, 2018, 2:08 p.m. UTC | #20
Johannes Berg <johannes@sipsolutions.net> writes:

> On Thu, 2018-11-15 at 09:10 -0800, Toke Høiland-Jørgensen wrote:
>> Louie Lu <me@louie.lu> writes:
>> 
>> > Hi Rajkumar, Toke,
>> > 
>> > I found the series (v3,4/6) remove the debugfs remove reset
>> > station's
>> > airtime method, and didn't added at here.
>> > 
>> > Not sure how to help this kind of situation, do I need a separate
>> > patch to fix this, or posting the patch here is fine?
>> 
>> This is fine; we can fold it into the next version. Thanks :)
>
> Just FYI - I'm going to assume, given this comment and the long
> discussion, that there will be a next version :)

I think toke's still sleeping off the post graduation hangover.

>
> johannes
>
> _______________________________________________
> Make-wifi-fast mailing list
> Make-wifi-fast@lists.bufferbloat.net
> https://lists.bufferbloat.net/listinfo/make-wifi-fast
Toke Høiland-Jørgensen Dec. 18, 2018, 7:19 p.m. UTC | #21
Johannes Berg <johannes@sipsolutions.net> writes:

> On Thu, 2018-11-15 at 09:10 -0800, Toke Høiland-Jørgensen wrote:
>> Louie Lu <me@louie.lu> writes:
>> 
>> > Hi Rajkumar, Toke,
>> > 
>> > I found the series (v3,4/6) remove the debugfs remove reset station's
>> > airtime method, and didn't added at here.
>> > 
>> > Not sure how to help this kind of situation, do I need a separate
>> > patch to fix this, or posting the patch here is fine?
>> 
>> This is fine; we can fold it into the next version. Thanks :)
>
> Just FYI - I'm going to assume, given this comment and the long
> discussion, that there will be a next version :)

Yes. Got caught up in moving before I managed to get another version
out. Will get around to it eventually, promise! :)

-Toke

Patch
diff mbox series

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 18b11c119b7e..c43d615ee9b1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2357,6 +2357,9 @@  enum ieee80211_hw_flags {
  * @tx_sk_pacing_shift: Pacing shift to set on TCP sockets when frames from
  *	them are encountered. The default should typically not be changed,
  *	unless the driver has good reasons for needing more buffers.
+ *
+ * @weight_multipler: Driver specific airtime weight multiplier used while
+ *	refilling deficit of each TXQ.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -2393,6 +2396,7 @@  struct ieee80211_hw {
 	const struct ieee80211_cipher_scheme *cipher_schemes;
 	u8 max_nan_de_entries;
 	u8 tx_sk_pacing_shift;
+	u8 weight_multiplier;
 };
 
 static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw,
@@ -5393,6 +5397,34 @@  void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
 void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid);
 
 /**
+ * ieee80211_sta_register_airtime - register airtime usage for a sta/tid
+ *
+ * Register airtime usage for a given sta on a given tid. The driver can call
+ * this function to notify mac80211 that a station used a certain amount of
+ * airtime. This information will be used by the TXQ scheduler to schedule
+ * stations in a way that ensures airtime fairness.
+ *
+ * The reported airtime should as a minimum include all time that is spent
+ * transmitting to the remote station, including overhead and padding, but not
+ * including time spent waiting for a TXOP. If the time is not reported by the
+ * hardware it can in some cases be calculated from the rate and known frame
+ * composition. When possible, the time should include any failed transmission
+ * attempts.
+ *
+ * The driver can either call this function synchronously for every packet or
+ * aggregate, or asynchronously as airtime usage information becomes available.
+ * TX and RX airtime can be reported together, or separately by setting one of
+ * them to 0.
+ *
+ * @pubsta: the station
+ * @tid: the TID to register airtime for
+ * @tx_airtime: airtime used during TX (in usec)
+ * @rx_airtime: airtime used during RX (in usec)
+ */
+void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
+				    u32 tx_airtime, u32 rx_airtime);
+
+/**
  * ieee80211_iter_keys - iterate keys programmed into the device
  * @hw: pointer obtained from ieee80211_alloc_hw()
  * @vif: virtual interface to iterate, may be %NULL for all
@@ -6150,6 +6182,33 @@  struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac);
 
 /**
+ * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
+ *
+ * This function is used to check whether given txq is allowed to transmit by
+ * the airtime scheduler, and can be used by drivers to access the airtime
+ * fairness accounting without going using the scheduling order enfored by
+ * next_txq().
+ *
+ * Returns %true if the airtime scheduler thinks the TXQ should be allowed to
+ * transmit, and %false if it should be throttled. This function can also have
+ * the side effect of rotating the TXQ in the scheduler rotation, which will
+ * eventually bring the deficit to positive and allow the station to transmit
+ * again.
+ *
+ * The API ieee80211_txq_may_transmit() also ensures that TXQ list will be
+ * aligned aginst driver's own round-robin scheduler list. i.e it rotates
+ * the TXQ list till it makes the requested node becomes the first entry
+ * in TXQ list. Thus both the TXQ list and driver's list are in sync. If this
+ * function returns %true, the driver is expected to schedule packets
+ * for transmission, and then return the TXQ through ieee80211_return_txq().
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @txq: pointer obtained from station or virtual interface
+ */
+bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
+				struct ieee80211_txq *txq);
+
+/**
  * ieee80211_txq_get_depth - get pending frame/byte count of given txq
  *
  * The values are not guaranteed to be coherent with regard to each other, i.e.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 818aa0060349..57c59e5ceb98 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1447,6 +1447,9 @@  static int sta_apply_parameters(struct ieee80211_local *local,
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		sta_apply_mesh_params(local, sta, params);
 
+	if (params->airtime_weight)
+		sta->airtime_weight = params->airtime_weight;
+
 	/* set the STA state after all sta info from usermode has been set */
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
 	    set & BIT(NL80211_STA_FLAG_ASSOCIATED)) {
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 3fe541e358f3..81c5fec2eae7 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -383,6 +383,9 @@  void debugfs_hw_add(struct ieee80211_local *local)
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD_MODE(aqm, 0600);
 
+	debugfs_create_u16("airtime_flags", 0600,
+			   phyd, &local->airtime_flags);
+
 	statsd = debugfs_create_dir("statistics", phyd);
 
 	/* if the dir failed, don't put all the other things into the root! */
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index af5185a836e5..446908ab3f5d 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -181,9 +181,9 @@  static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 			       txqi->tin.tx_bytes,
 			       txqi->tin.tx_packets,
 			       txqi->flags,
-			       txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
-			       txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
-			       txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
+			       test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN",
+			       test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "",
+			       test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "");
 	}
 
 	rcu_read_unlock();
@@ -195,6 +195,46 @@  static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 }
 STA_OPS(aqm);
 
+static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
+				size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct ieee80211_local *local = sta->sdata->local;
+	size_t bufsz = 200;
+	char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
+	u64 rx_airtime = 0, tx_airtime = 0;
+	s64 deficit[IEEE80211_NUM_ACS];
+	ssize_t rv;
+	int ac;
+
+	if (!buf)
+		return -ENOMEM;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		rx_airtime += sta->airtime[ac].rx_airtime;
+		tx_airtime += sta->airtime[ac].tx_airtime;
+		deficit[ac] = sta->airtime[ac].deficit;
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+
+	p += scnprintf(p, bufsz + buf - p,
+		"RX: %llu us\nTX: %llu us\nWeight: %u\n"
+		"Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+		rx_airtime,
+		tx_airtime,
+		sta->airtime_weight,
+		deficit[0],
+		deficit[1],
+		deficit[2],
+		deficit[3]);
+
+	rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	kfree(buf);
+	return rv;
+}
+STA_OPS(airtime);
+
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
@@ -906,6 +946,10 @@  void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD(aqm);
 
+	if (wiphy_ext_feature_isset(local->hw.wiphy,
+				    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		DEBUGFS_ADD(airtime);
+
 	if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
 		debugfs_create_x32("driver_buffered_tids", 0400,
 				   sta->debugfs_dir,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 33763a2eac81..c5b6ba571288 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1137,6 +1137,8 @@  struct ieee80211_local {
 	struct list_head active_txqs[IEEE80211_NUM_ACS];
 	u16 schedule_round[IEEE80211_NUM_ACS];
 
+	u16 airtime_flags;
+
 	const struct ieee80211_ops *ops;
 
 	/*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ba64757afea4..3710727fa9c1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -667,6 +667,7 @@  struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 		INIT_LIST_HEAD(&local->active_txqs[i]);
 		spin_lock_init(&local->active_txq_lock[i]);
 	}
+	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
 
 	INIT_LIST_HEAD(&local->chanctx_list);
 	mutex_init(&local->chanctx_mtx);
@@ -1153,6 +1154,9 @@  int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (!local->hw.max_nan_de_entries)
 		local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID;
 
+	if (!local->hw.weight_multiplier)
+		local->hw.weight_multiplier = 1;
+
 	result = ieee80211_wep_init(local);
 	if (result < 0)
 		wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c2f5cb7df54f..25d47b2af7a9 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -90,7 +90,6 @@  static void __cleanup_single_sta(struct sta_info *sta)
 	struct tid_ampdu_tx *tid_tx;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
-	struct fq *fq = &local->fq;
 	struct ps_data *ps;
 
 	if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -120,9 +119,7 @@  static void __cleanup_single_sta(struct sta_info *sta)
 
 			txqi = to_txq_info(sta->sta.txq[i]);
 
-			spin_lock_bh(&fq->lock);
 			ieee80211_txq_purge(local, txqi);
-			spin_unlock_bh(&fq->lock);
 		}
 	}
 
@@ -387,9 +384,12 @@  struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	if (sta_prepare_rate_control(local, sta, gfp))
 		goto free_txq;
 
+	sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
+
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
+		sta->airtime[i].deficit = sta->airtime_weight;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1826,6 +1826,27 @@  void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
 }
 EXPORT_SYMBOL(ieee80211_sta_set_buffered);
 
+void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
+				    u32 tx_airtime, u32 rx_airtime)
+{
+	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+	struct ieee80211_local *local = sta->sdata->local;
+	u8 ac = ieee80211_ac_from_tid(tid);
+	u32 airtime = 0;
+
+	if (sta->local->airtime_flags & AIRTIME_USE_TX)
+		airtime += tx_airtime;
+	if (sta->local->airtime_flags & AIRTIME_USE_RX)
+		airtime += rx_airtime;
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+	sta->airtime[ac].tx_airtime += tx_airtime;
+	sta->airtime[ac].rx_airtime += rx_airtime;
+	sta->airtime[ac].deficit -= airtime;
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+}
+EXPORT_SYMBOL(ieee80211_sta_register_airtime);
+
 int sta_info_move_state(struct sta_info *sta,
 			enum ieee80211_sta_state new_state)
 {
@@ -2188,6 +2209,23 @@  void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
 	}
 
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->rx_duration += sta->airtime[ac].rx_airtime;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_DURATION);
+	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->tx_duration += sta->airtime[ac].tx_airtime;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_DURATION);
+	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
+		sinfo->airtime_weight = sta->airtime_weight;
+		sinfo->filled |= BIT(NL80211_STA_INFO_AIRTIME_WEIGHT);
+	}
+
 	sinfo->rx_dropped_misc = sta->rx_stats.dropped;
 	if (sta->pcpu_rx_stats) {
 		for_each_possible_cpu(cpu) {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9a04327d71d1..b1b0fd6a2e21 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -127,6 +127,16 @@  enum ieee80211_agg_stop_reason {
 	AGG_STOP_DESTROY_STA,
 };
 
+/* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
+#define AIRTIME_USE_TX		BIT(0)
+#define AIRTIME_USE_RX		BIT(1)
+
+struct airtime_info {
+	u64 rx_airtime;
+	u64 tx_airtime;
+	s64 deficit;
+};
+
 struct sta_info;
 
 /**
@@ -563,6 +573,9 @@  struct sta_info {
 	} tx_stats;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
+	struct airtime_info airtime[IEEE80211_NUM_ACS];
+	u16 airtime_weight;
+
 	/*
 	 * Aggregation information, locked with lock.
 	 */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index aa4afbf0abaf..a1f1256448f5 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -818,6 +818,12 @@  static void __ieee80211_tx_status(struct ieee80211_hw *hw,
 			ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data,
 						acked, info->status.tx_time);
 
+		if (info->status.tx_time &&
+		    wiphy_ext_feature_isset(local->hw.wiphy,
+					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+			ieee80211_sta_register_airtime(&sta->sta, tid,
+						       info->status.tx_time, 0);
+
 		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 			if (info->flags & IEEE80211_TX_STAT_ACK) {
 				if (sta->status_stats.lost_packets)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 305965283506..3f417e80e041 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1488,8 +1488,11 @@  void ieee80211_txq_purge(struct ieee80211_local *local,
 	struct fq *fq = &local->fq;
 	struct fq_tin *tin = &txqi->tin;
 
+	spin_lock_bh(&fq->lock);
 	fq_tin_reset(fq, tin, fq_skb_free_func);
 	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
+	spin_unlock_bh(&fq->lock);
+
 	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
 	list_del_init(&txqi->schedule_order);
 	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
@@ -3638,11 +3641,28 @@  struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 
 	lockdep_assert_held(&local->active_txq_lock[ac]);
 
+ begin:
 	txqi = list_first_entry_or_null(&local->active_txqs[ac],
 					struct txq_info,
 					schedule_order);
+	if (!txqi)
+		return NULL;
+
+	if (txqi->txq.sta) {
+		struct sta_info *sta = container_of(txqi->txq.sta,
+						struct sta_info, sta);
+
+		if (sta->airtime[txqi->txq.ac].deficit < 0) {
+			sta->airtime[txqi->txq.ac].deficit +=
+				sta->airtime_weight;
+			list_move_tail(&txqi->schedule_order,
+				       &local->active_txqs[txqi->txq.ac]);
+			goto begin;
+		}
+	}
+
 
-	if (!txqi || txqi->schedule_round == local->schedule_round[ac])
+	if (txqi->schedule_round == local->schedule_round[ac])
 		return NULL;
 
 	list_del_init(&txqi->schedule_order);
@@ -3660,12 +3680,74 @@  void ieee80211_return_txq(struct ieee80211_hw *hw,
 	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
 
 	if (list_empty(&txqi->schedule_order) &&
-	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets))
-		list_add_tail(&txqi->schedule_order,
-			      &local->active_txqs[txq->ac]);
+	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
+		/* If airtime accounting is active, always enqueue STAs at the
+		 * head of the list to ensure that they only get moved to the
+		 * back by the airtime DRR scheduler once they have a negative
+		 * deficit. A station that already has a negative deficit will
+		 * get immediately moved to the back of the list on the next
+		 * call to ieee80211_next_txq().
+		 */
+		if (txqi->txq.sta &&
+		    wiphy_ext_feature_isset(local->hw.wiphy,
+					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+			list_add(&txqi->schedule_order,
+				 &local->active_txqs[txq->ac]);
+		else
+			list_add_tail(&txqi->schedule_order,
+				      &local->active_txqs[txq->ac]);
+	}
 }
 EXPORT_SYMBOL(ieee80211_return_txq);
 
+bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
+				struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
+	struct sta_info *sta;
+	u8 ac = txq->ac;
+
+	lockdep_assert_held(&local->active_txq_lock[ac]);
+
+	if (!txqi->txq.sta)
+		goto out;
+
+	if (list_empty(&txqi->schedule_order))
+		goto out;
+
+	list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
+				 schedule_order) {
+		if (iter == txqi)
+			break;
+
+		if (!iter->txq.sta) {
+			list_move_tail(&iter->schedule_order,
+				       &local->active_txqs[ac]);
+			continue;
+		}
+		sta = container_of(iter->txq.sta, struct sta_info, sta);
+		if (sta->airtime[ac].deficit < 0)
+			sta->airtime[ac].deficit += sta->airtime_weight;
+		list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
+	}
+
+	sta = container_of(txqi->txq.sta, struct sta_info, sta);
+	if (sta->airtime[ac].deficit >= 0)
+		goto out;
+
+	sta->airtime[ac].deficit += sta->airtime_weight;
+	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+
+	return false;
+out:
+	if (!list_empty(&txqi->schedule_order))
+		list_del_init(&txqi->schedule_order);
+
+	return true;
+}
+EXPORT_SYMBOL(ieee80211_txq_may_transmit);
+
 void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);