diff mbox

mac80211: prevent skb/txq mismatch

Message ID 1484231321-3179-1-git-send-email-michal.kazior@tieto.com (mailing list archive)
State Changes Requested
Delegated to: Johannes Berg
Headers show

Commit Message

Michal Kazior Jan. 12, 2017, 2:28 p.m. UTC
Station structure is considered as not uploaded
(to driver) until drv_sta_state() finishes. This
call is however done after the structure is
attached to mac80211 internal lists and hashes.
This means mac80211 can lookup (and use) station
structure before it is uploaded to a driver.

If this happens (structure exists, but
sta->uploaded is false) fast_tx path can still be
taken. Deep in the fastpath call the sta->uploaded
is checked against to derive "pubsta" argument for
ieee80211_get_txq(). If sta->uploaded is false
(and sta is actually non-NULL) ieee80211_get_txq()
effectively downgraded to vif->txq.

At first glance this may look innocent but coerces
mac80211 into a state that is almost guaranteed
(codel may drop offending skb) to crash because a
station-oriented skb gets queued up on
vif-oriented txq. The ieee80211_tx_dequeue() ends
up looking at info->control.flags and tries to use
txq->sta which in the fail case is NULL.

It's probably pointless to pretend one can
downgrade skb from sta-txq to vif-txq.

Only drivers using wake_tx_queue were affected.

Example crash dump before fix:

 Unable to handle kernel paging request at virtual address ffffe26c
 PC is at ieee80211_tx_dequeue+0x204/0x690 [mac80211]
 [<bf4252a4>] (ieee80211_tx_dequeue [mac80211]) from
 [<bf4b1388>] (ath10k_mac_tx_push_txq+0x54/0x1c0 [ath10k_core])
 [<bf4b1388>] (ath10k_mac_tx_push_txq [ath10k_core]) from
 [<bf4bdfbc>] (ath10k_htt_txrx_compl_task+0xd78/0x11d0 [ath10k_core])
 [<bf4bdfbc>] (ath10k_htt_txrx_compl_task [ath10k_core])
 [<bf51c5a4>] (ath10k_pci_napi_poll+0x54/0xe8 [ath10k_pci])
 [<bf51c5a4>] (ath10k_pci_napi_poll [ath10k_pci]) from
 [<c0572e90>] (net_rx_action+0xac/0x160)

Reported-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
---
 net/mac80211/tx.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

Comments

Mohammed Shafi Shajakhan Jan. 12, 2017, 2:45 p.m. UTC | #1
On Thu, Jan 12, 2017 at 03:28:41PM +0100, Michal Kazior wrote:
> Station structure is considered as not uploaded
> (to driver) until drv_sta_state() finishes. This
> call is however done after the structure is
> attached to mac80211 internal lists and hashes.
> This means mac80211 can lookup (and use) station
> structure before it is uploaded to a driver.
> 
> If this happens (structure exists, but
> sta->uploaded is false) fast_tx path can still be
> taken. Deep in the fastpath call the sta->uploaded
> is checked against to derive "pubsta" argument for
> ieee80211_get_txq(). If sta->uploaded is false
> (and sta is actually non-NULL) ieee80211_get_txq()
> effectively downgraded to vif->txq.
> 
> At first glance this may look innocent but coerces
> mac80211 into a state that is almost guaranteed
> (codel may drop offending skb) to crash because a
> station-oriented skb gets queued up on
> vif-oriented txq. The ieee80211_tx_dequeue() ends
> up looking at info->control.flags and tries to use
> txq->sta which in the fail case is NULL.
> 
> It's probably pointless to pretend one can
> downgrade skb from sta-txq to vif-txq.
> 
> Only drivers using wake_tx_queue were affected.
> 
> Example crash dump before fix:
> 
>  Unable to handle kernel paging request at virtual address ffffe26c
>  PC is at ieee80211_tx_dequeue+0x204/0x690 [mac80211]
>  [<bf4252a4>] (ieee80211_tx_dequeue [mac80211]) from
>  [<bf4b1388>] (ath10k_mac_tx_push_txq+0x54/0x1c0 [ath10k_core])
>  [<bf4b1388>] (ath10k_mac_tx_push_txq [ath10k_core]) from
>  [<bf4bdfbc>] (ath10k_htt_txrx_compl_task+0xd78/0x11d0 [ath10k_core])
>  [<bf4bdfbc>] (ath10k_htt_txrx_compl_task [ath10k_core])
>  [<bf51c5a4>] (ath10k_pci_napi_poll+0x54/0xe8 [ath10k_pci])
>  [<bf51c5a4>] (ath10k_pci_napi_poll [ath10k_pci]) from
>  [<c0572e90>] (net_rx_action+0xac/0x160)
> 
> Reported-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>
> Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>

> ---
>  net/mac80211/tx.c | 17 +++++++----------
>  1 file changed, 7 insertions(+), 10 deletions(-)
> 
> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index 4dea18be385c..c77fcf83d004 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -1244,13 +1244,16 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
>  
>  static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
>  					  struct ieee80211_vif *vif,
> -					  struct ieee80211_sta *pubsta,
> +					  struct sta_info *sta,
>  					  struct sk_buff *skb)
>  {
>  	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
>  	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
>  	struct ieee80211_txq *txq = NULL;
>  
> +	if (sta && !sta->uploaded)
> +		return NULL;
> +
>  	if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
>  	    (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
>  		return NULL;
> @@ -1258,10 +1261,10 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
>  	if (!ieee80211_is_data(hdr->frame_control))
>  		return NULL;
>  
> -	if (pubsta) {
> +	if (sta) {
>  		u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
>  
> -		txq = pubsta->txq[tid];
> +		txq = sta->sta.txq[tid];
>  	} else if (vif) {
>  		txq = vif->txq;
>  	}
> @@ -1504,23 +1507,17 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
>  	struct fq *fq = &local->fq;
>  	struct ieee80211_vif *vif;
>  	struct txq_info *txqi;
> -	struct ieee80211_sta *pubsta;
>  
>  	if (!local->ops->wake_tx_queue ||
>  	    sdata->vif.type == NL80211_IFTYPE_MONITOR)
>  		return false;
>  
> -	if (sta && sta->uploaded)
> -		pubsta = &sta->sta;
> -	else
> -		pubsta = NULL;
> -
>  	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
>  		sdata = container_of(sdata->bss,
>  				     struct ieee80211_sub_if_data, u.ap);
>  
>  	vif = &sdata->vif;
> -	txqi = ieee80211_get_txq(local, vif, pubsta, skb);
> +	txqi = ieee80211_get_txq(local, vif, sta, skb);
>  
>  	if (!txqi)
>  		return false;
> -- 
> 2.1.4
>
Johannes Berg Jan. 12, 2017, 2:51 p.m. UTC | #2
On Thu, 2017-01-12 at 20:15 +0530, Mohammed Shafi Shajakhan wrote:
> 
> > Reported-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>
> > Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
> 
> Signed-off-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>

That makes no sense, you're not handling the patch in any way. You can
say Tested-by, or Acked-by if you like, but not S-o-b. See the DCO
(Documentation/SubmittingPatches)

johannes
Mohammed Shafi Shajakhan Jan. 12, 2017, 2:54 p.m. UTC | #3
On Thu, Jan 12, 2017 at 03:51:20PM +0100, Johannes Berg wrote:
> On Thu, 2017-01-12 at 20:15 +0530, Mohammed Shafi Shajakhan wrote:
> > 
> > > Reported-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>
> > > Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
> > 
> > Signed-off-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>
> 
> That makes no sense, you're not handling the patch in any way. You can
> say Tested-by, or Acked-by if you like, but not S-o-b. See the DCO
> (Documentation/SubmittingPatches)
>
apologies for that, I assumed we can add Signed-off on the fly
Felix Fietkau Jan. 12, 2017, 6:11 p.m. UTC | #4
On 2017-01-12 15:28, Michal Kazior wrote:
> Station structure is considered as not uploaded
> (to driver) until drv_sta_state() finishes. This
> call is however done after the structure is
> attached to mac80211 internal lists and hashes.
> This means mac80211 can lookup (and use) station
> structure before it is uploaded to a driver.
> 
> If this happens (structure exists, but
> sta->uploaded is false) fast_tx path can still be
> taken. Deep in the fastpath call the sta->uploaded
> is checked against to derive "pubsta" argument for
> ieee80211_get_txq(). If sta->uploaded is false
> (and sta is actually non-NULL) ieee80211_get_txq()
> effectively downgraded to vif->txq.
> 
> At first glance this may look innocent but coerces
> mac80211 into a state that is almost guaranteed
> (codel may drop offending skb) to crash because a
> station-oriented skb gets queued up on
> vif-oriented txq. The ieee80211_tx_dequeue() ends
> up looking at info->control.flags and tries to use
> txq->sta which in the fail case is NULL.
> 
> It's probably pointless to pretend one can
> downgrade skb from sta-txq to vif-txq.
> 
> Only drivers using wake_tx_queue were affected.
> 
> Example crash dump before fix:
> 
>  Unable to handle kernel paging request at virtual address ffffe26c
>  PC is at ieee80211_tx_dequeue+0x204/0x690 [mac80211]
>  [<bf4252a4>] (ieee80211_tx_dequeue [mac80211]) from
>  [<bf4b1388>] (ath10k_mac_tx_push_txq+0x54/0x1c0 [ath10k_core])
>  [<bf4b1388>] (ath10k_mac_tx_push_txq [ath10k_core]) from
>  [<bf4bdfbc>] (ath10k_htt_txrx_compl_task+0xd78/0x11d0 [ath10k_core])
>  [<bf4bdfbc>] (ath10k_htt_txrx_compl_task [ath10k_core])
>  [<bf51c5a4>] (ath10k_pci_napi_poll+0x54/0xe8 [ath10k_pci])
>  [<bf51c5a4>] (ath10k_pci_napi_poll [ath10k_pci]) from
>  [<c0572e90>] (net_rx_action+0xac/0x160)
> 
> Reported-by: Mohammed Shafi Shajakhan <mohammed@qti.qualcomm.com>
> Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Acked-by: Felix Fietkau <nbd@nbd.name>
Dave Taht Jan. 12, 2017, 7:05 p.m. UTC | #5
Yay! This sounds like a potential fix for this?

https://bugs.lede-project.org/index.php?do=details&task_id=368

Are all the ath10k chipsets excluded by commit:

4ca1807815aa6801aaced7fdefa9edacc2521767

Still needed to be excluded?
Johannes Berg Jan. 13, 2017, 8:16 a.m. UTC | #6
On Thu, 2017-01-12 at 15:28 +0100, Michal Kazior wrote:
> Station structure is considered as not uploaded
> (to driver) until drv_sta_state() finishes. This
> call is however done after the structure is
> attached to mac80211 internal lists and hashes.
> This means mac80211 can lookup (and use) station
> structure before it is uploaded to a driver.
> 
> If this happens (structure exists, but
> sta->uploaded is false) fast_tx path can still be
> taken. Deep in the fastpath call the sta->uploaded
> is checked against to derive "pubsta" argument for
> ieee80211_get_txq(). If sta->uploaded is false
> (and sta is actually non-NULL) ieee80211_get_txq()
> effectively downgraded to vif->txq.
> 
> At first glance this may look innocent but coerces
> mac80211 into a state that is almost guaranteed
> (codel may drop offending skb) to crash because a
> station-oriented skb gets queued up on
> vif-oriented txq. The ieee80211_tx_dequeue() ends
> up looking at info->control.flags and tries to use
> txq->sta which in the fail case is NULL.
> 
> It's probably pointless to pretend one can
> downgrade skb from sta-txq to vif-txq.

Ok. I understand things until this point, more or less.

What I don't understand - and you haven't really described - is how the
changes fix it? Could you resend with a paragraph added that explains
that?

Also, you're adding a test:

>	if (sta && !sta->uploaded)

but couldn't do move that into the existing "if (sta)" block?
Everything before that only ever returns NULL anyway.

johannes
Michal Kazior Jan. 13, 2017, 9:04 a.m. UTC | #7
On 13 January 2017 at 09:16, Johannes Berg <johannes@sipsolutions.net> wrote:
> On Thu, 2017-01-12 at 15:28 +0100, Michal Kazior wrote:
>> Station structure is considered as not uploaded
>> (to driver) until drv_sta_state() finishes. This
>> call is however done after the structure is
>> attached to mac80211 internal lists and hashes.
>> This means mac80211 can lookup (and use) station
>> structure before it is uploaded to a driver.
>>
>> If this happens (structure exists, but
>> sta->uploaded is false) fast_tx path can still be
>> taken. Deep in the fastpath call the sta->uploaded
>> is checked against to derive "pubsta" argument for
>> ieee80211_get_txq(). If sta->uploaded is false
>> (and sta is actually non-NULL) ieee80211_get_txq()
>> effectively downgraded to vif->txq.
>>
>> At first glance this may look innocent but coerces
>> mac80211 into a state that is almost guaranteed
>> (codel may drop offending skb) to crash because a
>> station-oriented skb gets queued up on
>> vif-oriented txq. The ieee80211_tx_dequeue() ends
>> up looking at info->control.flags and tries to use
>> txq->sta which in the fail case is NULL.
>>
>> It's probably pointless to pretend one can
>> downgrade skb from sta-txq to vif-txq.
>
> Ok. I understand things until this point, more or less.
>
> What I don't understand - and you haven't really described - is how the
> changes fix it? Could you resend with a paragraph added that explains
> that?

"Since downgrading unicast traffic to vif->txq must not be done
there's no txq to put a frame on if sta->uploaded is false. Therefore
the code is made to fall back to regular tx() op path if the described
condition is hit. " -- is this sufficient?


> Also, you're adding a test:
>
>>       if (sta && !sta->uploaded)
>
> but couldn't do move that into the existing "if (sta)" block?
> Everything before that only ever returns NULL anyway.

Good point. It makes more sense to put the sta->uploaded check in if
(sta) block. I'll move it.


Michał
Johannes Berg Jan. 13, 2017, 10:13 a.m. UTC | #8
> > What I don't understand - and you haven't really described - is how
> > the changes fix it? Could you resend with a paragraph added that
> > explains that?
> 
> "Since downgrading unicast traffic to vif->txq must not be done
> there's no txq to put a frame on if sta->uploaded is false. Therefore
> the code is made to fall back to regular tx() op path if the
> described condition is hit. " -- is this sufficient?

Makes sense.

> > Also, you're adding a test:
> > 
> > >       if (sta && !sta->uploaded)
> > 
> > but couldn't do move that into the existing "if (sta)" block?
> > Everything before that only ever returns NULL anyway.
> 
> Good point. It makes more sense to put the sta->uploaded check in if
> (sta) block. I'll move it.
> 

Ok, thanks.

johannes
diff mbox

Patch

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4dea18be385c..c77fcf83d004 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1244,13 +1244,16 @@  ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 
 static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
 					  struct ieee80211_vif *vif,
-					  struct ieee80211_sta *pubsta,
+					  struct sta_info *sta,
 					  struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_txq *txq = NULL;
 
+	if (sta && !sta->uploaded)
+		return NULL;
+
 	if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
 	    (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
 		return NULL;
@@ -1258,10 +1261,10 @@  static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
 	if (!ieee80211_is_data(hdr->frame_control))
 		return NULL;
 
-	if (pubsta) {
+	if (sta) {
 		u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
 
-		txq = pubsta->txq[tid];
+		txq = sta->sta.txq[tid];
 	} else if (vif) {
 		txq = vif->txq;
 	}
@@ -1504,23 +1507,17 @@  static bool ieee80211_queue_skb(struct ieee80211_local *local,
 	struct fq *fq = &local->fq;
 	struct ieee80211_vif *vif;
 	struct txq_info *txqi;
-	struct ieee80211_sta *pubsta;
 
 	if (!local->ops->wake_tx_queue ||
 	    sdata->vif.type == NL80211_IFTYPE_MONITOR)
 		return false;
 
-	if (sta && sta->uploaded)
-		pubsta = &sta->sta;
-	else
-		pubsta = NULL;
-
 	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		sdata = container_of(sdata->bss,
 				     struct ieee80211_sub_if_data, u.ap);
 
 	vif = &sdata->vif;
-	txqi = ieee80211_get_txq(local, vif, pubsta, skb);
+	txqi = ieee80211_get_txq(local, vif, sta, skb);
 
 	if (!txqi)
 		return false;