diff mbox

[RFC,1/3] mac80211: Add TXQ scheduling API

Message ID 20171010140208.1515-1-toke@toke.dk (mailing list archive)
State RFC
Delegated to: Johannes Berg
Headers show

Commit Message

Toke Høiland-Jørgensen Oct. 10, 2017, 2:02 p.m. UTC
This adds an API to mac80211 to handle scheduling of TXQs and changes the
interface between driver and mac80211 for TXQ handling as follows:

- The wake_tx_queue callback interface no longer includes the TXQ. Instead, the
  driver is expected to retrieve that from ieee80211_next_txq()

- Two new mac80211 functions are added: ieee80211_next_txq() and
  ieee80211_schedule_txq(). The former returns the next TXQ that should be
  scheduled, and is how the driver gets a queue to pull packets from. The latter
  is called internally by mac80211 to start scheduling a queue, and the driver
  is supposed to call it to re-schedule the TXQ after it is finished pulling
  packets from it (unless the queue emptied).

- A drv_buffered flag is added to struct ieee80211_txq which the driver can set
  to request mac80211 to re-schedule the TXQ at PS wakeup.

Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
This version of the scheduler is pretty dumb and just does round-robin
scheduling (pretty much identical to what ath10k does today). In the
future, this will be extended to do airtime fairness scheduling for
drivers that provide airtime information.

I have only compile tested this series; will runtime test it later this
week, but for now comments on the API would be appreciated. In
particular, I'm not sure what the right thing to do in regards to PS
wakeup is...

 include/net/mac80211.h     | 34 +++++++++++++++++++++++++++++-----
 net/mac80211/agg-tx.c      |  4 +++-
 net/mac80211/driver-ops.h  | 12 +++---------
 net/mac80211/ieee80211_i.h |  7 ++++++-
 net/mac80211/main.c        |  3 +++
 net/mac80211/sta_info.c    |  3 ++-
 net/mac80211/trace.h       | 21 ++++-----------------
 net/mac80211/tx.c          | 40 +++++++++++++++++++++++++++++++++++++++-
 8 files changed, 89 insertions(+), 35 deletions(-)

Comments

Johannes Berg Oct. 10, 2017, 3:53 p.m. UTC | #1
On Tue, 2017-10-10 at 16:02 +0200, Toke Høiland-Jørgensen wrote:

> +++ b/net/mac80211/agg-tx.c
> @@ -226,9 +226,11 @@ ieee80211_agg_start_txq(struct sta_info *sta,
> int tid, bool enable)
>  		clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
>  
>  	clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
> +	ieee80211_schedule_txq(&sta->sdata->local->hw, txq);
> +
>  	local_bh_disable();
>  	rcu_read_lock();
> -	drv_wake_tx_queue(sta->sdata->local, txqi);
> +	drv_wake_tx_queue(sta->sdata->local);
>  	rcu_read_unlock();
>  	local_bh_enable();

It seems like there could be some sort of TX batching here - maybe only
call the driver if the queue was actually scheduled?

Return true/false from ieee80211_schedule_txq() depending on whether it
was added or not, and then call the driver only if it was added just
now? That way, we can save a bunch of driver calls, batching the TX.
 
> @@ -1121,6 +1122,9 @@ struct ieee80211_local {
>  	struct codel_vars *cvars;
>  	struct codel_params cparams;
>  
> +	struct list_head active_txqs;
> +	spinlock_t active_txq_lock;

Is there much point in having a separate lock? We probably need the fq
lock in most places related to this anyway?

> +++ b/net/mac80211/sta_info.c
> @@ -1250,8 +1250,9 @@ void ieee80211_sta_ps_deliver_wakeup(struct
> sta_info *sta)
>  			if (!txq_has_queue(sta->sta.txq[i]))
>  				continue;
>  
> -			drv_wake_tx_queue(local, to_txq_info(sta-
> >sta.txq[i]));
> +			ieee80211_schedule_txq(&local->hw, sta-
> >sta.txq[i]);
>  		}
> +		drv_wake_tx_queue(local);

Again, calling the driver could be conditional on having done any
interesting work.

> @@ -1524,7 +1526,8 @@ static bool ieee80211_queue_skb(struct
> ieee80211_local *local,
>  	ieee80211_txq_enqueue(local, txqi, skb);
>  	spin_unlock_bh(&fq->lock);
>  
> -	drv_wake_tx_queue(local, txqi);
> +	ieee80211_schedule_txq(&local->hw, &txqi->txq);
> +	drv_wake_tx_queue(local);

ditto

> +void ieee80211_schedule_txq(struct ieee80211_hw *hw,
> +			     struct ieee80211_txq *txq)
> +{
> +	struct ieee80211_local *local = hw_to_local(hw);
> +	struct txq_info *txqi = to_txq_info(txq);
> +
> +	spin_lock_bh(&local->active_txq_lock);
> +
> +	if (!list_empty(&txqi->schedule_order))
> +		list_add_tail(&txqi->schedule_order, &local-
> >active_txqs);

What's with the !list_empty()? Seems inverted to me? You need to add it
if it's empty?

Also maybe you should only do that if the TXQ isn't *empty*, so the
driver could call this unconditionally?

> +struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw)
> +{
> +	struct ieee80211_local *local = hw_to_local(hw);
> +	struct txq_info *txqi = NULL;
> +
> +	spin_lock_bh(&local->active_txq_lock);
> +
> +	if (list_empty(&local->active_txqs))
> +		goto out;
> +
> +	txqi = list_first_entry(&local->active_txqs, struct
> txq_info, schedule_order);
> +	list_del_init(&txqi->schedule_order);
> +
> +out:
> +	spin_unlock_bh(&local->active_txq_lock);
> +
> +	return &txqi->txq;

You forgot
	if (!txqi)
		return NULL;

johannes
Johannes Berg Oct. 10, 2017, 4:05 p.m. UTC | #2
> In particular, I'm not sure what the right thing to do in regards to
> PS wakeup is...

Can you explain what you were _trying_ to do?

I don't like calling this "driver_buffered" because that's already a
term for frames that are buffered in the driver ... :-)

PS is complicated, we basically transmit, in the following order:
 * filtered frames (tx_filtered)
 * buffered frames (ps_tx_buf)
 * regular frames

This is when we _leave_ powersave. When we deliver frames while the
station is sleeping (PS-Poll or U-APSD), they don't even go through the
TXQ. They still come from this place, but currently go directly to the
->tx() method, which to me is actually pretty weird but that's what it
is now.

As I think I said in my other thread, we should probably eventually
just get rid of ps_tx_buf entirely, instead just keeping the frames on
the TXQ. Then, filtered can just be pushed onto txqi->frags [*], and we
get rid of having that separately as well.

Then, we've completely solved the wakeup scenario, we just start
scheduling that TXQ normally again.

For the deliver-while-sleeping (PS-Poll/U-APSD) scenario, I think the
driver should still pull frames, after calling something like
drv_release_buffered_frames(). We want this to be scheduled pretty much
immediately, so we shouldn't just put the TXQ into the normal rotation,
but otherwise it should work similarly - except limited to a certain
number of frames [**].
In this case the driver probably needs to pull the frames using a
different function so that we can
 a) tag the packets properly (more-data, EOSP)
 b) generate nulldata as EOSP container where needed
Thus, it seems likely that we'll want a separate function, "pull for PS
delivery" rather than the normal ieee80211_tx_dequeue().

johannes

[*] ok not exactly, if there are frags there already things get messy.
    but we can probably solve that somehow without needing more special
    cases
Toke Høiland-Jørgensen Oct. 10, 2017, 5:51 p.m. UTC | #3
Johannes Berg <johannes@sipsolutions.net> writes:

> On Tue, 2017-10-10 at 16:02 +0200, Toke Høiland-Jørgensen wrote:
>
>> +++ b/net/mac80211/agg-tx.c
>> @@ -226,9 +226,11 @@ ieee80211_agg_start_txq(struct sta_info *sta,
>> int tid, bool enable)
>>  		clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
>>  
>>  	clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
>> +	ieee80211_schedule_txq(&sta->sdata->local->hw, txq);
>> +
>>  	local_bh_disable();
>>  	rcu_read_lock();
>> -	drv_wake_tx_queue(sta->sdata->local, txqi);
>> +	drv_wake_tx_queue(sta->sdata->local);
>>  	rcu_read_unlock();
>>  	local_bh_enable();
>
> It seems like there could be some sort of TX batching here - maybe
> only call the driver if the queue was actually scheduled?

Yeah, I guess that should be doable.

>> @@ -1121,6 +1122,9 @@ struct ieee80211_local {
>>  	struct codel_vars *cvars;
>>  	struct codel_params cparams;
>>  
>> +	struct list_head active_txqs;
>> +	spinlock_t active_txq_lock;
>
> Is there much point in having a separate lock? We probably need the fq
> lock in most places related to this anyway?

Well, once the scheduler gets a bit smarter it may be necessary to much
with the order of TXQs on there without touching any of the queues
(e.g., when calculating airtime usage on TX and RX completion). Not sure
if that is enough to warrant a separate lock, though; I hadn't thought
about just grabbing fq->lock...

>> +void ieee80211_schedule_txq(struct ieee80211_hw *hw,
>> +			     struct ieee80211_txq *txq)
>> +{
>> +	struct ieee80211_local *local = hw_to_local(hw);
>> +	struct txq_info *txqi = to_txq_info(txq);
>> +
>> +	spin_lock_bh(&local->active_txq_lock);
>> +
>> +	if (!list_empty(&txqi->schedule_order))
>> +		list_add_tail(&txqi->schedule_order, &local-
>> >active_txqs);
>
> What's with the !list_empty()? Seems inverted to me? You need to add
> it if it's empty?

Yeah, nice catch. This started out as an "unschedule" function before I
decided this way was better. Guess I forgot to switch the logic :)

> Also maybe you should only do that if the TXQ isn't *empty*, so the
> driver could call this unconditionally?

There can be cases where the driver wants the queue to be scheduled even
though it looks empty from mac80211's point of view. For ath9k, the
driver keeps its retry queue in the drv_priv part of the txq structure,
so it will check if that is empty before deciding to call the schedule
function.

This is also related to the PS behaviour, so guess this could be changed
once that is all TXQ-based...

-Toke
Toke Høiland-Jørgensen Oct. 10, 2017, 6:04 p.m. UTC | #4
Johannes Berg <johannes@sipsolutions.net> writes:

>> In particular, I'm not sure what the right thing to do in regards to
>> PS wakeup is...
>
> Can you explain what you were _trying_ to do?
>
> I don't like calling this "driver_buffered" because that's already a
> term for frames that are buffered in the driver ... :-)

Well I was trying to do The Right Thing(tm), obviously ;)

The driver_buffered field comes from the previous behaviour of ath9k: It
would basically set the station_buffered flag if there was something in
the retry queue at the time it goes to sleep. And on wakeup, it will
reschedule the txq if it has anything in the retry queue. And, well, it
just seemed odd that there was this duplicated logic in the driver and
mac80211, if the driver could just signal to mac80211 to reschedule for
it...

But I guess I was really just getting ahead of myself there; so the
right thing to do for now is to keep the old behaviour, and then fix it
properly afterwards?

> PS is complicated, we basically transmit, in the following order:
>  * filtered frames (tx_filtered)
>  * buffered frames (ps_tx_buf)
>  * regular frames
>
> This is when we _leave_ powersave. When we deliver frames while the
> station is sleeping (PS-Poll or U-APSD), they don't even go through the
> TXQ. They still come from this place, but currently go directly to the
> ->tx() method, which to me is actually pretty weird but that's what it
> is now.
>
> As I think I said in my other thread, we should probably eventually
> just get rid of ps_tx_buf entirely, instead just keeping the frames on
> the TXQ. Then, filtered can just be pushed onto txqi->frags [*], and we
> get rid of having that separately as well.
>
> Then, we've completely solved the wakeup scenario, we just start
> scheduling that TXQ normally again.

Yes, this makes sense. Each sleep period is pretty short, right? I.e.,
we don't need to deal with interactions between CoDel and the queue
being stopped for a long period of time?

> For the deliver-while-sleeping (PS-Poll/U-APSD) scenario, I think the
> driver should still pull frames, after calling something like
> drv_release_buffered_frames(). We want this to be scheduled pretty much
> immediately, so we shouldn't just put the TXQ into the normal rotation,
> but otherwise it should work similarly - except limited to a certain
> number of frames [**].
> In this case the driver probably needs to pull the frames using a
> different function so that we can
>  a) tag the packets properly (more-data, EOSP)
>  b) generate nulldata as EOSP container where needed
> Thus, it seems likely that we'll want a separate function, "pull for PS
> delivery" rather than the normal ieee80211_tx_dequeue().

I was envisioning that next_txq() could also make those kinds of
decisions (i.e., preempt the normal scheduling algorithm when a
"special" TXQ needs to be scheduled immediately). But not sure if that
is enough for this case?

-Toke
Johannes Berg Oct. 11, 2017, 8:44 a.m. UTC | #5
> Well I was trying to do The Right Thing(tm), obviously ;)

:-)

> The driver_buffered field comes from the previous behaviour of ath9k:
> It would basically set the station_buffered flag if there was
> something in the retry queue at the time it goes to sleep. And on
> wakeup, it will reschedule the txq if it has anything in the retry
> queue. And, well, it just seemed odd that there was this duplicated
> logic in the driver and mac80211, if the driver could just signal to
> mac80211 to reschedule for it...

Ok. I'm not sure what ath9k does, but mac80211 makes a distinction
between "do I have something buffered" and "does the driver have
something buffered".

When the station wakes up it's pretty easy, we tell the driver and it
just has to send its own frames first.

But when we get a PS-Poll/U-APSD it's more complicated, and we have to
tell the driver "please release N frames you have buffered" or we may
have to tell it "please allow me to send N frames that I have buffered"
which is why we need the distinction of whether or not the driver has
something buffered.

Ultimately, with TXQs, I hope this distinction can go away because the
driver wouldn't buffer all by itself.

> But I guess I was really just getting ahead of myself there; so the
> right thing to do for now is to keep the old behaviour, and then fix
> it properly afterwards?

I guess that'd be easier.

> Yes, this makes sense. Each sleep period is pretty short, right? 

Not necessarily.

> I.e., we don't need to deal with interactions between CoDel and the
> queue being stopped for a long period of time?

I don't know enough about the possible interactions to answer that.
Sleep periods may be long, though typically if there's traffic for
stations then they want to retrieve that and will wake up relatively
soon, but "relatively soon" may still be on the order of hundreds of
milliseconds (or even seconds) because sometimes clients will only
listen to DTIM beacons (DTIM period * beacon interval), and e.g. with
WNM sleep mode it becomes even longer.

> > For the deliver-while-sleeping (PS-Poll/U-APSD) scenario, I think
> > the
> > driver should still pull frames, after calling something like
> > drv_release_buffered_frames(). We want this to be scheduled pretty
> > much
> > immediately, so we shouldn't just put the TXQ into the normal
> > rotation,
> > but otherwise it should work similarly - except limited to a
> > certain
> > number of frames [**].
> > In this case the driver probably needs to pull the frames using a
> > different function so that we can
> >  a) tag the packets properly (more-data, EOSP)
> >  b) generate nulldata as EOSP container where needed
> > Thus, it seems likely that we'll want a separate function, "pull
> > for PS
> > delivery" rather than the normal ieee80211_tx_dequeue().
> 
> I was envisioning that next_txq() could also make those kinds of
> decisions (i.e., preempt the normal scheduling algorithm when a
> "special" TXQ needs to be scheduled immediately). But not sure if
> that is enough for this case?

Hmm, not sure. We really want this to be scheduled pretty much
immediately because the other side will be waiting for the frames, and
if we don't get an answer out quickly it'll have to assume we're
broken. I don't know what the limit here is for our firmware, but we
should really get this out as soon as possible in this case.

johannes
Johannes Berg Oct. 11, 2017, 8:46 a.m. UTC | #6
On Tue, 2017-10-10 at 19:51 +0200, Toke Høiland-Jørgensen wrote:

> > > +	struct list_head active_txqs;
> > > +	spinlock_t active_txq_lock;
> > 
> > Is there much point in having a separate lock? We probably need the
> > fq lock in most places related to this anyway?
> 
> Well, once the scheduler gets a bit smarter it may be necessary to
> much with the order of TXQs on there without touching any of the
> queues (e.g., when calculating airtime usage on TX and RX
> completion). Not sure if that is enough to warrant a separate lock,
> though; I hadn't thought about just grabbing fq->lock...

Ok, dunno. It seemed sort of related but perhaps it's not really.


> > Also maybe you should only do that if the TXQ isn't *empty*, so the
> > driver could call this unconditionally?
> 
> There can be cases where the driver wants the queue to be scheduled
> even though it looks empty from mac80211's point of view. For ath9k,
> the driver keeps its retry queue in the drv_priv part of the txq
> structure, so it will check if that is empty before deciding to call
> the schedule function.
> 
> This is also related to the PS behaviour, so guess this could be
> changed once that is all TXQ-based...

Interesting. I guess scheduling an empty queue doesn't really matter
for mac80211 anyway though - just some extra work if we try to get
frames from it.

johannes
Toke Høiland-Jørgensen Oct. 11, 2017, 1:54 p.m. UTC | #7
Johannes Berg <johannes@sipsolutions.net> writes:

> On Tue, 2017-10-10 at 19:51 +0200, Toke Høiland-Jørgensen wrote:
>
>> > > +	struct list_head active_txqs;
>> > > +	spinlock_t active_txq_lock;
>> > 
>> > Is there much point in having a separate lock? We probably need the
>> > fq lock in most places related to this anyway?
>> 
>> Well, once the scheduler gets a bit smarter it may be necessary to
>> much with the order of TXQs on there without touching any of the
>> queues (e.g., when calculating airtime usage on TX and RX
>> completion). Not sure if that is enough to warrant a separate lock,
>> though; I hadn't thought about just grabbing fq->lock...
>
> Ok, dunno. It seemed sort of related but perhaps it's not really.

Yeah. I'll keep this in mind when I implement the airtime fairness
accounting.

>> > Also maybe you should only do that if the TXQ isn't *empty*, so the
>> > driver could call this unconditionally?
>> 
>> There can be cases where the driver wants the queue to be scheduled
>> even though it looks empty from mac80211's point of view. For ath9k,
>> the driver keeps its retry queue in the drv_priv part of the txq
>> structure, so it will check if that is empty before deciding to call
>> the schedule function.
>> 
>> This is also related to the PS behaviour, so guess this could be
>> changed once that is all TXQ-based...
>
> Interesting. I guess scheduling an empty queue doesn't really matter
> for mac80211 anyway though - just some extra work if we try to get
> frames from it.

Yes, and I figure giving the driver that option might be a good way to
have some flexibility.

-Toke
Toke Høiland-Jørgensen Oct. 11, 2017, 2:06 p.m. UTC | #8
Johannes Berg <johannes@sipsolutions.net> writes:

>> Well I was trying to do The Right Thing(tm), obviously ;)
>
> :-)
>
>> The driver_buffered field comes from the previous behaviour of ath9k:
>> It would basically set the station_buffered flag if there was
>> something in the retry queue at the time it goes to sleep. And on
>> wakeup, it will reschedule the txq if it has anything in the retry
>> queue. And, well, it just seemed odd that there was this duplicated
>> logic in the driver and mac80211, if the driver could just signal to
>> mac80211 to reschedule for it...
>
> Ok. I'm not sure what ath9k does, but mac80211 makes a distinction
> between "do I have something buffered" and "does the driver have
> something buffered".
>
> When the station wakes up it's pretty easy, we tell the driver and it
> just has to send its own frames first.
>
> But when we get a PS-Poll/U-APSD it's more complicated, and we have to
> tell the driver "please release N frames you have buffered" or we may
> have to tell it "please allow me to send N frames that I have buffered"
> which is why we need the distinction of whether or not the driver has
> something buffered.
>
> Ultimately, with TXQs, I hope this distinction can go away because the
> driver wouldn't buffer all by itself.

Yes, that would be good. Okay, I'll keep PS buffering the way it works
now.

>> But I guess I was really just getting ahead of myself there; so the
>> right thing to do for now is to keep the old behaviour, and then fix
>> it properly afterwards?
>
> I guess that'd be easier.
>
>> Yes, this makes sense. Each sleep period is pretty short, right? 
>
> Not necessarily.
>
>> I.e., we don't need to deal with interactions between CoDel and the
>> queue being stopped for a long period of time?
>
> I don't know enough about the possible interactions to answer that.
> Sleep periods may be long, though typically if there's traffic for
> stations then they want to retrieve that and will wake up relatively
> soon, but "relatively soon" may still be on the order of hundreds of
> milliseconds (or even seconds) because sometimes clients will only
> listen to DTIM beacons (DTIM period * beacon interval), and e.g. with
> WNM sleep mode it becomes even longer.

Ah. Right. Well, guess we'll have to cross that bridge once we get
there. Not sure what the right thing to do from the point of view of an
AQM in this instance. But at least making sure it doesn't drop the
packet at the head of every queue at wakeup would probably be good.

>> I was envisioning that next_txq() could also make those kinds of
>> decisions (i.e., preempt the normal scheduling algorithm when a
>> "special" TXQ needs to be scheduled immediately). But not sure if
>> that is enough for this case?
>
> Hmm, not sure. We really want this to be scheduled pretty much
> immediately because the other side will be waiting for the frames, and
> if we don't get an answer out quickly it'll have to assume we're
> broken. I don't know what the limit here is for our firmware, but we
> should really get this out as soon as possible in this case.

OK. But presumably it can't preempt packets already pushed to the
hardware, right? So telling the driver to immediately schedule a packet,
and making sure that the txq it gets from next_txq() is the right one
should do the trick? But I guess it's a bit of a roundabout way, which
may not be worth it to avoid an extra callback...

-Toke
Johannes Berg Oct. 11, 2017, 2:15 p.m. UTC | #9
On Wed, 2017-10-11 at 16:06 +0200, Toke Høiland-Jørgensen wrote:

> > Hmm, not sure. We really want this to be scheduled pretty much
> > immediately because the other side will be waiting for the frames,
> > and
> > if we don't get an answer out quickly it'll have to assume we're
> > broken. I don't know what the limit here is for our firmware, but
> > we
> > should really get this out as soon as possible in this case.
> 
> OK. But presumably it can't preempt packets already pushed to the
> hardware, right? 

True. If there are still packets scheduled then it needs even more
driver tricks to drop those back to tx_pending first ...

> So telling the driver to immediately schedule a packet,
> and making sure that the txq it gets from next_txq() is the right one
> should do the trick? But I guess it's a bit of a roundabout way,
> which may not be worth it to avoid an extra callback...

Yeah, might work, but remember that we need to mangle the packet, and
for that we need to know how many packets will go out. E.g. with U-
APSD, if we tell the driver 8 packets are OK, and it only wants 6, then
that's acceptable, but we have to tag the last of those with EOSP ...

So one way or another I think we need a separate callback here, and
perhaps just have the driver do the EOSP tagging, or have a separate
function to pull the frames so mac80211 can do the tagging, dunno.

Note that this is only for _some_ drivers, others will implement much
of this in firmware.

johannes
Toke Høiland-Jørgensen Oct. 11, 2017, 2:29 p.m. UTC | #10
Johannes Berg <johannes@sipsolutions.net> writes:

> On Wed, 2017-10-11 at 16:06 +0200, Toke Høiland-Jørgensen wrote:
>
>> > Hmm, not sure. We really want this to be scheduled pretty much
>> > immediately because the other side will be waiting for the frames,
>> > and
>> > if we don't get an answer out quickly it'll have to assume we're
>> > broken. I don't know what the limit here is for our firmware, but
>> > we
>> > should really get this out as soon as possible in this case.
>> 
>> OK. But presumably it can't preempt packets already pushed to the
>> hardware, right? 
>
> True. If there are still packets scheduled then it needs even more
> driver tricks to drop those back to tx_pending first ...

Only for packets to the same station, right?

>> So telling the driver to immediately schedule a packet,
>> and making sure that the txq it gets from next_txq() is the right one
>> should do the trick? But I guess it's a bit of a roundabout way,
>> which may not be worth it to avoid an extra callback...
>
> Yeah, might work, but remember that we need to mangle the packet, and
> for that we need to know how many packets will go out. E.g. with U-
> APSD, if we tell the driver 8 packets are OK, and it only wants 6, then
> that's acceptable, but we have to tag the last of those with EOSP ...
>
> So one way or another I think we need a separate callback here, and
> perhaps just have the driver do the EOSP tagging, or have a separate
> function to pull the frames so mac80211 can do the tagging, dunno.

Yeah, sounds like it'll need a separate callback, or at least a flag.

What part of the standard do I have to read to learn how this is
supposed to work, BTW? Or even better, is there a resource that
describes how PS works that is more accessible than the standard itself?

> Note that this is only for _some_ drivers, others will implement much
> of this in firmware.

Right, of course. Fun times! :)

-Toke
Johannes Berg Oct. 11, 2017, 2:34 p.m. UTC | #11
On Wed, 2017-10-11 at 16:29 +0200, Toke Høiland-Jørgensen wrote:
> Johannes Berg <johannes@sipsolutions.net> writes:
> 
> > On Wed, 2017-10-11 at 16:06 +0200, Toke Høiland-Jørgensen wrote:
> > 
> > > > Hmm, not sure. We really want this to be scheduled pretty much
> > > > immediately because the other side will be waiting for the
> > > > frames,
> > > > and
> > > > if we don't get an answer out quickly it'll have to assume
> > > > we're
> > > > broken. I don't know what the limit here is for our firmware,
> > > > but
> > > > we
> > > > should really get this out as soon as possible in this case.
> > > 
> > > OK. But presumably it can't preempt packets already pushed to the
> > > hardware, right? 
> > 
> > True. If there are still packets scheduled then it needs even more
> > driver tricks to drop those back to tx_pending first ...
> 
> Only for packets to the same station, right?

Yes, but they need to be somehow taken off the queue to avoid
reordering. Usually the firmware will filter them, but synchronisation
is difficult, since what might happen is that some frames are already
filtered, others aren't, and then the later ones actually need to go
out while the earlier ones don't ... if that's not done, you get
reordering and the state machines get out of sync.

> What part of the standard do I have to read to learn how this is
> supposed to work, BTW? Or even better, is there a resource that
> describes how PS works that is more accessible than the standard
> itself?

Hmm. I don't think the standard will actually help you much here, it
just describes the over-the-air behaviour.

Most of the complexity here comes from having to deal with driver
buffering, mac80211 buffering, addressing the reordering problem
(described above) from queuing frames for multiple stations on the same
hw queue.

johannes
diff mbox

Patch

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index cc9073e45be9..80a80efa06b1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -105,9 +105,12 @@ 
  * The driver is expected to initialize its private per-queue data for stations
  * and interfaces in the .add_interface and .sta_add ops.
  *
- * The driver can't access the queue directly. To dequeue a frame, it calls
- * ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a queue, it
- * calls the .wake_tx_queue driver op.
+ * The driver can't access the queue directly. To obtain the next queue to pull
+ * frames from, the driver calls ieee80211_next_txq(). To dequeue a frame from a
+ * txq, it calls ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a
+ * queue, it calls the .wake_tx_queue driver op. The driver is expected to
+ * re-schedule the txq using ieee80211_schedule_txq() if it is still active
+ * after the driver has finished pulling packets from it.
  *
  * For AP powersave TIM handling, the driver only needs to indicate if it has
  * buffered packets in the driver specific data structures by calling
@@ -1870,6 +1873,7 @@  struct ieee80211_txq {
 	u8 tid;
 	u8 ac;
 
+	bool drv_buffered;
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
 };
@@ -3723,8 +3727,7 @@  struct ieee80211_ops {
 					 struct ieee80211_vif *vif,
 					 struct ieee80211_tdls_ch_sw_params *params);
 
-	void (*wake_tx_queue)(struct ieee80211_hw *hw,
-			      struct ieee80211_txq *txq);
+	void (*wake_tx_queue)(struct ieee80211_hw *hw);
 	void (*sync_rx_queues)(struct ieee80211_hw *hw);
 
 	int (*start_nan)(struct ieee80211_hw *hw,
@@ -5876,6 +5879,27 @@  void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid);
 struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 				     struct ieee80211_txq *txq);
 
+/**
+ * ieee80211_schedule_txq - add txq to scheduling loop
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @txq: pointer obtained from station or virtual interface
+ *
+ */
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			    struct ieee80211_txq *txq);
+
+/**
+ * ieee80211_next_txq - get next tx queue to pull packets from
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ *
+ * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
+ * is returned, it will have been removed from the scheduler queue and needs to
+ * be re-scheduled with ieee80211_schedule_txq() to continue to be active.
+ */
+struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw);
+
 /**
  * ieee80211_txq_get_depth - get pending frame/byte count of given txq
  *
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index bef516ec47f9..77bd0ff90677 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -226,9 +226,11 @@  ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
 		clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
 
 	clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+	ieee80211_schedule_txq(&sta->sdata->local->hw, txq);
+
 	local_bh_disable();
 	rcu_read_lock();
-	drv_wake_tx_queue(sta->sdata->local, txqi);
+	drv_wake_tx_queue(sta->sdata->local);
 	rcu_read_unlock();
 	local_bh_enable();
 }
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 09f77e4a8a79..e20a9e2acd53 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1157,16 +1157,10 @@  drv_tdls_recv_channel_switch(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
-static inline void drv_wake_tx_queue(struct ieee80211_local *local,
-				     struct txq_info *txq)
+static inline void drv_wake_tx_queue(struct ieee80211_local *local)
 {
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
-
-	if (!check_sdata_in_driver(sdata))
-		return;
-
-	trace_drv_wake_tx_queue(local, sdata, txq);
-	local->ops->wake_tx_queue(&local->hw, &txq->txq);
+	trace_drv_wake_tx_queue(local);
+	local->ops->wake_tx_queue(&local->hw);
 }
 
 static inline int drv_start_nan(struct ieee80211_local *local,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9675814f64db..521bedb5dc86 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -832,6 +832,7 @@  struct txq_info {
 	struct codel_vars def_cvars;
 	struct codel_stats cstats;
 	struct sk_buff_head frags;
+	struct list_head schedule_order;
 	unsigned long flags;
 
 	/* keep last! */
@@ -1121,6 +1122,9 @@  struct ieee80211_local {
 	struct codel_vars *cvars;
 	struct codel_params cparams;
 
+	struct list_head active_txqs;
+	spinlock_t active_txq_lock;
+
 	const struct ieee80211_ops *ops;
 
 	/*
@@ -1505,7 +1509,8 @@  static inline bool txq_has_queue(struct ieee80211_txq *txq)
 {
 	struct txq_info *txqi = to_txq_info(txq);
 
-	return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets);
+	return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets
+		 && !txq->drv_buffered);
 }
 
 static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8aa1f5b6a051..9ad0556aa24b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -616,6 +616,9 @@  struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	spin_lock_init(&local->rx_path_lock);
 	spin_lock_init(&local->queue_stop_reason_lock);
 
+	INIT_LIST_HEAD(&local->active_txqs);
+	spin_lock_init(&local->active_txq_lock);
+
 	INIT_LIST_HEAD(&local->chanctx_list);
 	mutex_init(&local->chanctx_mtx);
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 9673e157bf8f..bd31e6dc808f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1250,8 +1250,9 @@  void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 			if (!txq_has_queue(sta->sta.txq[i]))
 				continue;
 
-			drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i]));
+			ieee80211_schedule_txq(&local->hw, sta->sta.txq[i]);
 		}
+		drv_wake_tx_queue(local);
 	}
 
 	skb_queue_head_init(&pending);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3d9ac17af407..531c0e7f2358 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2550,33 +2550,20 @@  TRACE_EVENT(drv_tdls_recv_channel_switch,
 );
 
 TRACE_EVENT(drv_wake_tx_queue,
-	TP_PROTO(struct ieee80211_local *local,
-		 struct ieee80211_sub_if_data *sdata,
-		 struct txq_info *txq),
+	TP_PROTO(struct ieee80211_local *local),
 
-	TP_ARGS(local, sdata, txq),
+	TP_ARGS(local),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
-		VIF_ENTRY
-		STA_ENTRY
-		__field(u8, ac)
-		__field(u8, tid)
 	),
 
 	TP_fast_assign(
-		struct ieee80211_sta *sta = txq->txq.sta;
-
 		LOCAL_ASSIGN;
-		VIF_ASSIGN;
-		STA_ASSIGN;
-		__entry->ac = txq->txq.ac;
-		__entry->tid = txq->txq.tid;
 	),
-
 	TP_printk(
-		LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT " ac:%d tid:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ac, __entry->tid
+		LOCAL_PR_FMT,
+		LOCAL_PR_ARG
 	)
 );
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 94826680cf2b..9c43c17c5144 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1405,6 +1405,7 @@  void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 	codel_vars_init(&txqi->def_cvars);
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
+	INIT_LIST_HEAD(&txqi->schedule_order);
 
 	txqi->txq.vif = &sdata->vif;
 
@@ -1428,6 +1429,7 @@  void ieee80211_txq_purge(struct ieee80211_local *local,
 
 	fq_tin_reset(fq, tin, fq_skb_free_func);
 	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
+	list_del_init(&txqi->schedule_order);
 }
 
 int ieee80211_txq_setup_flows(struct ieee80211_local *local)
@@ -1524,7 +1526,8 @@  static bool ieee80211_queue_skb(struct ieee80211_local *local,
 	ieee80211_txq_enqueue(local, txqi, skb);
 	spin_unlock_bh(&fq->lock);
 
-	drv_wake_tx_queue(local, txqi);
+	ieee80211_schedule_txq(&local->hw, &txqi->txq);
+	drv_wake_tx_queue(local);
 
 	return true;
 }
@@ -3517,6 +3520,41 @@  struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_tx_dequeue);
 
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			     struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+
+	spin_lock_bh(&local->active_txq_lock);
+
+	if (!list_empty(&txqi->schedule_order))
+		list_add_tail(&txqi->schedule_order, &local->active_txqs);
+
+	spin_unlock_bh(&local->active_txq_lock);
+}
+EXPORT_SYMBOL(ieee80211_schedule_txq);
+
+struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = NULL;
+
+	spin_lock_bh(&local->active_txq_lock);
+
+	if (list_empty(&local->active_txqs))
+		goto out;
+
+	txqi = list_first_entry(&local->active_txqs, struct txq_info, schedule_order);
+	list_del_init(&txqi->schedule_order);
+
+out:
+	spin_unlock_bh(&local->active_txq_lock);
+
+	return &txqi->txq;
+}
+EXPORT_SYMBOL(ieee80211_next_txq);
+
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  u32 info_flags)