diff mbox

[13/13] ath10k: implement push-pull tx

Message ID 1453384006-31907-14-git-send-email-michal.kazior@tieto.com (mailing list archive)
State Not Applicable
Delegated to: Kalle Valo
Headers show

Commit Message

Michal Kazior Jan. 21, 2016, 1:46 p.m. UTC
The current/old tx path design was that host, at
its own leisure, pushed tx frames to the device.
For HTT there was ~1000-1400 msdu queue depth.

After reaching that limit the driver would request
mac80211 to stop queues. There was little control
over what packets got in there as far as
DA/RA was considered so it was rather easy to
starve per-station traffic flows.

With MU-MIMO this became a significant problem
because the queue depth was insufficient to buffer
frames from multiple clients (which could have
different signal quality and capabilities) in an
efficient fashion.

Hence the new tx path in 10.4 was introduced: a
pull-push mode.

Firmware and host can share tx queue state via
DMA. The state is logically a 2 dimensional array
addressed via peer_id+tid pair. Each entry is a
counter (either number of bytes or packets. Host
keeps it updated and firmware uses it for
scheduling Tx pull requests to host.

This allows MU-MIMO to become a lot more effective
with 10+ clients.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
---
 drivers/net/wireless/ath/ath10k/core.h   |   1 +
 drivers/net/wireless/ath/ath10k/htt.h    |   6 ++
 drivers/net/wireless/ath/ath10k/htt_rx.c | 117 ++++++++++++++++++++++++++++---
 drivers/net/wireless/ath/ath10k/htt_tx.c |  39 ++++++++---
 drivers/net/wireless/ath/ath10k/mac.c    |  48 +++++++++++--
 drivers/net/wireless/ath/ath10k/mac.h    |   5 ++
 6 files changed, 192 insertions(+), 24 deletions(-)

Comments

Peter Oh Jan. 21, 2016, 5:40 p.m. UTC | #1
On 01/21/2016 05:46 AM, Michal Kazior wrote:
> The current/old tx path design was that host, at
> its own leisure, pushed tx frames to the device.
> For HTT there was ~1000-1400 msdu queue depth.
>
> After reaching that limit the driver would request
> mac80211 to stop queues. There was little control
> over what packets got in there as far as
> DA/RA was considered so it was rather easy to
> starve per-station traffic flows.
>
> With MU-MIMO this became a significant problem
> because the queue depth was insufficient to buffer
> frames from multiple clients (which could have
> different signal quality and capabilities) in an
> efficient fashion.
>
> Hence the new tx path in 10.4 was introduced: a
> pull-push mode.
>
> Firmware and host can share tx queue state via
> DMA. The state is logically a 2 dimensional array
> addressed via peer_id+tid pair. Each entry is a
> counter (either number of bytes or packets. Host
> keeps it updated and firmware uses it for
> scheduling Tx pull requests to host.
>
> This allows MU-MIMO to become a lot more effective
> with 10+ clients.
>
> Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
> ---
>   drivers/net/wireless/ath/ath10k/core.h   |   1 +
>   drivers/net/wireless/ath/ath10k/htt.h    |   6 ++
>   drivers/net/wireless/ath/ath10k/htt_rx.c | 117
> ++++++++++++++++++++++++++++---
>   drivers/net/wireless/ath/ath10k/htt_tx.c |  39 ++++++++---
>   drivers/net/wireless/ath/ath10k/mac.c    |  48 +++++++++++--
>   drivers/net/wireless/ath/ath10k/mac.h    |   5 ++
>   6 files changed, 192 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath10k/core.h
> b/drivers/net/wireless/ath/ath10k/core.h
> index f51887c6be74..ab8cbccc0f4b 100644
> --- a/drivers/net/wireless/ath/ath10k/core.h
> +++ b/drivers/net/wireless/ath/ath10k/core.h
> @@ -307,6 +307,7 @@ struct ath10k_peer {
>   
>   struct ath10k_txq {
>   	unsigned long num_fw_queued;
> +	unsigned long num_push_allowed;
>   };
>   
>   struct ath10k_sta {
> diff --git a/drivers/net/wireless/ath/ath10k/htt.h
> b/drivers/net/wireless/ath/ath10k/htt.h
> index b1e40f44e76b..02cf55d306e8 100644
> --- a/drivers/net/wireless/ath/ath10k/htt.h
> +++ b/drivers/net/wireless/ath/ath10k/htt.h
> @@ -1652,6 +1652,7 @@ struct ath10k_htt {
>   	struct sk_buff_head tx_compl_q;
>   	struct sk_buff_head rx_compl_q;
>   	struct sk_buff_head rx_in_ord_compl_q;
> +	struct sk_buff_head tx_fetch_ind_q;
>   
>   	/* rx_status template */
>   	struct ieee80211_rx_status rx_status;
> @@ -1670,8 +1671,10 @@ struct ath10k_htt {
>   		bool enabled;
>   		struct htt_q_state *vaddr;
>   		dma_addr_t paddr;
> +		u16 num_push_allowed;
>   		u16 num_peers;
>   		u16 num_tids;
> +		enum htt_tx_mode_switch_mode mode;
>   		enum htt_q_depth_type type;
>   	} tx_q_state;
>   };
> @@ -1761,6 +1764,9 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
>   
>   void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
>   			      struct ieee80211_txq *txq);
> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
> +			      struct ieee80211_txq *txq);
> +void ath10k_htt_tx_txq_sync(struct ath10k *ar);
>   void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt,
>   			       bool is_mgmt);
>   int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt,
> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c
> b/drivers/net/wireless/ath/ath10k/htt_rx.c
> index 6e3d95c95568..827c8369b589 100644
> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
> @@ -229,6 +229,7 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt)
>   	skb_queue_purge(&htt->tx_compl_q);
>   	skb_queue_purge(&htt->rx_compl_q);
>   	skb_queue_purge(&htt->rx_in_ord_compl_q);
> +	skb_queue_purge(&htt->tx_fetch_ind_q);
>   
>   	ath10k_htt_rx_ring_free(htt);
>   
> @@ -569,6 +570,7 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
>   	skb_queue_head_init(&htt->tx_compl_q);
>   	skb_queue_head_init(&htt->rx_compl_q);
>   	skb_queue_head_init(&htt->rx_in_ord_compl_q);
> +	skb_queue_head_init(&htt->tx_fetch_ind_q);
>   
>   	tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
>   		     (unsigned long)htt);
> @@ -2004,16 +2006,21 @@ static void
> ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
>   
>   static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff
> *skb)
>   {
> +	struct ieee80211_hw *hw = ar->hw;
> +	struct ieee80211_txq *txq;
>   	struct htt_resp *resp = (struct htt_resp *)skb->data;
>   	struct htt_tx_fetch_record *record;
>   	size_t len;
>   	size_t max_num_bytes;
>   	size_t max_num_msdus;
> +	size_t num_bytes;
> +	size_t num_msdus;
>   	const __le32 *resp_ids;
>   	u16 num_records;
>   	u16 num_resp_ids;
>   	u16 peer_id;
>   	u8 tid;
> +	int ret;
>   	int i;
>   
>   	ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n");
> @@ -2039,7 +2046,17 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
> ath10k *ar, struct sk_buff *skb)
>   		   num_records, num_resp_ids,
>   		   le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num));
>   
> -	/* TODO: runtime sanity checks */
> +	if (!ar->htt.tx_q_state.enabled) {
> +		ath10k_warn(ar, "received unexpected tx_fetch_ind event:
> not enabled\n");
> +		return;
> +	}
> +
> +	if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) {
> +		ath10k_warn(ar, "received unexpected tx_fetch_ind event:
> in push mode\n");
> +		return;
> +	}
> +
> +	rcu_read_lock();
>   
>   	for (i = 0; i < num_records; i++) {
>   		record = &resp->tx_fetch_ind.records[i];
> @@ -2060,13 +2077,56 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
> ath10k *ar, struct sk_buff *skb)
>   			continue;
>   		}
>   
> -		/* TODO: dequeue and submit tx to device */
> +		spin_lock_bh(&ar->data_lock);
> +		txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
> +		spin_unlock_bh(&ar->data_lock);
> +
> +		/* It is okay to release the lock and use txq because RCU
> read
> +		 * lock is held.
> +		 */
> +
> +		if (unlikely(!txq)) {
> +			ath10k_warn(ar, "failed to lookup txq for peer_id
> %hu tid %hhu\n",
> +				    peer_id, tid);
> +			continue;
> +		}
> +
> +		num_msdus = 0;
> +		num_bytes = 0;
> +
> +		while (num_msdus < max_num_msdus &&
> +		       num_bytes < max_num_bytes) {
> +			ret = ath10k_mac_tx_push_txq(hw, txq);
> +			if (ret < 0)
> +				break;
> +
> +			num_msdus++;
> +			num_bytes += ret;
> +		}
> +
> +		record->num_msdus = cpu_to_le16(num_msdus);
> +		record->num_bytes = cpu_to_le32(num_bytes);
> +
> +		ath10k_htt_tx_txq_recalc(hw, txq);
>   	}
>   
> +	rcu_read_unlock();
> +
>   	resp_ids =
> ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind);
>   	ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids,
> num_resp_ids);
>   
> -	/* TODO: generate and send fetch response to device */
> +	ret = ath10k_htt_tx_fetch_resp(ar,
> +				       resp->tx_fetch_ind.token,
> +				       resp->tx_fetch_ind.fetch_seq_num,
> +				       resp->tx_fetch_ind.records,
> +				       num_records);
> +	if (unlikely(ret)) {
> +		ath10k_warn(ar, "failed to submit tx fetch resp for token
> 0x%08x: %d\n",
> +			    le32_to_cpu(resp->tx_fetch_ind.token), ret);
> +		/* FIXME: request fw restart */
> +	}
> +
> +	ath10k_htt_tx_txq_sync(ar);
>   }
>   
>   static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar,
> @@ -2102,6 +2162,8 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
> ath10k *ar,
>   {
>   	const struct htt_resp *resp = (void *)skb->data;
>   	const struct htt_tx_mode_switch_record *record;
> +	struct ieee80211_txq *txq;
> +	struct ath10k_txq *artxq;
>   	size_t len;
>   	size_t num_records;
>   	enum htt_tx_mode_switch_mode mode;
> @@ -2153,7 +2215,11 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
> ath10k *ar,
>   	if (!enable)
>   		return;
>   
> -	/* TODO: apply configuration */
> +	ar->htt.tx_q_state.enabled = enable;
> +	ar->htt.tx_q_state.mode = mode;
> +	ar->htt.tx_q_state.num_push_allowed = threshold;
> +
> +	rcu_read_lock();
>   
>   	for (i = 0; i < num_records; i++) {
>   		record = &resp->tx_mode_switch_ind.records[i];
> @@ -2168,10 +2234,33 @@ static void
> ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
>   			continue;
>   		}
>   
> -		/* TODO: apply configuration */
> +		spin_lock_bh(&ar->data_lock);
> +		txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
> +		spin_unlock_bh(&ar->data_lock);
> +
> +		/* It is okay to release the lock and use txq because RCU
> read
> +		 * lock is held.
> +		 */
> +
> +		if (unlikely(!txq)) {
> +			ath10k_warn(ar, "failed to lookup txq for peer_id
> %hu tid %hhu\n",
> +				    peer_id, tid);
> +			continue;
> +		}
> +
> +		spin_lock_bh(&ar->htt.tx_lock);
> +		artxq = (void *)txq->drv_priv;
> +		artxq->num_push_allowed =
> le16_to_cpu(record->num_max_msdus);
> +		spin_unlock_bh(&ar->htt.tx_lock);
>   	}
>   
> -	/* TODO: apply configuration */
> +	rcu_read_unlock();
> +
> +	spin_lock_bh(&ar->htt.tx_lock);
> +	ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
> +	spin_unlock_bh(&ar->htt.tx_lock);
> +
Isn't it proved it break Mesh from working?
> +	ath10k_mac_tx_push_pending(ar);
>   }
>   
>   void ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
> @@ -2317,8 +2406,9 @@ void ath10k_htt_t2h_msg_handler(struct ath10k *ar,
> struct sk_buff *skb)
>   	case HTT_T2H_MSG_TYPE_AGGR_CONF:
>   		break;
>   	case HTT_T2H_MSG_TYPE_TX_FETCH_IND:
> -		ath10k_htt_rx_tx_fetch_ind(ar, skb);
> -		break;
> +		skb_queue_tail(&htt->tx_fetch_ind_q, skb);
> +		tasklet_schedule(&htt->txrx_compl_task);
> +		return;
>   	case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM:
>   		ath10k_htt_rx_tx_fetch_confirm(ar, skb);
>   		break;
> @@ -2356,21 +2446,32 @@ static void ath10k_htt_txrx_compl_task(unsigned
> long ptr)
>   	struct ath10k_htt *htt = (struct ath10k_htt *)ptr;
>   	struct ath10k *ar = htt->ar;
>   	struct sk_buff_head tx_q;
> +	struct sk_buff_head tx_ind_q;
>   	struct htt_resp *resp;
>   	struct sk_buff *skb;
>   	unsigned long flags;
>   
>   	__skb_queue_head_init(&tx_q);
> +	__skb_queue_head_init(&tx_ind_q);
>   
>   	spin_lock_irqsave(&htt->tx_compl_q.lock, flags);
>   	skb_queue_splice_init(&htt->tx_compl_q, &tx_q);
>   	spin_unlock_irqrestore(&htt->tx_compl_q.lock, flags);
>   
> +	spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
> +	skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
> +	spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
> +
>   	while ((skb = __skb_dequeue(&tx_q))) {
>   		ath10k_htt_rx_frm_tx_compl(htt->ar, skb);
>   		dev_kfree_skb_any(skb);
>   	}
>   
> +	while ((skb = __skb_dequeue(&tx_ind_q))) {
> +		ath10k_htt_rx_tx_fetch_ind(ar, skb);
> +		dev_kfree_skb_any(skb);
> +	}
> +
>   	ath10k_mac_tx_push_pending(ar);
>   
>   	spin_lock_bh(&htt->rx_ring.lock);
> diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c
> b/drivers/net/wireless/ath/ath10k/htt_tx.c
> index 5ef0a1b3773c..00877ff6861e 100644
> --- a/drivers/net/wireless/ath/ath10k/htt_tx.c
> +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
> @@ -62,6 +62,9 @@ static void __ath10k_htt_tx_txq_recalc(struct
> ieee80211_hw *hw,
>   	if (!ar->htt.tx_q_state.enabled)
>   		return;
>   
> +	if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
> +		return;
> +
>   	if (txq->sta)
>   		peer_id = arsta->peer_id;
>   	else
> @@ -97,6 +100,9 @@ static void __ath10k_htt_tx_txq_sync(struct ath10k *ar)
>   	if (!ar->htt.tx_q_state.enabled)
>   		return;
>   
> +	if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
> +		return;
> +
>   	seq = le32_to_cpu(ar->htt.tx_q_state.vaddr->seq);
>   	seq++;
>   	ar->htt.tx_q_state.vaddr->seq = cpu_to_le32(seq);
> @@ -111,6 +117,23 @@ static void __ath10k_htt_tx_txq_sync(struct ath10k
> *ar)
>   				   DMA_TO_DEVICE);
>   }
>   
> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
> +			      struct ieee80211_txq *txq)
> +{
> +	struct ath10k *ar = hw->priv;
> +
> +	spin_lock_bh(&ar->htt.tx_lock);
> +	__ath10k_htt_tx_txq_recalc(hw, txq);
> +	spin_unlock_bh(&ar->htt.tx_lock);
> +}
> +
> +void ath10k_htt_tx_txq_sync(struct ath10k *ar)
> +{
> +	spin_lock_bh(&ar->htt.tx_lock);
> +	__ath10k_htt_tx_txq_sync(ar);
> +	spin_unlock_bh(&ar->htt.tx_lock);
> +}
> +
>   void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
>   			      struct ieee80211_txq *txq)
>   {
> @@ -634,10 +657,14 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
>   {
>   	struct sk_buff *skb;
>   	struct htt_cmd *cmd;
> -	u16 resp_id;
> +	const u16 resp_id = 0;
>   	int len = 0;
>   	int ret;
>   
> +	/* Response IDs are echo-ed back only for host driver convienence
> +	 * purposes. They aren't used for anything in the driver yet so
> use 0.
> +	 */
> +
>   	len += sizeof(cmd->hdr);
>   	len += sizeof(cmd->tx_fetch_resp);
>   	len += sizeof(cmd->tx_fetch_resp.records[0]) * num_records;
> @@ -646,11 +673,6 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
>   	if (!skb)
>   		return -ENOMEM;
>   
> -	resp_id = 0; /* TODO: allocate resp_id */
> -	ret = 0;
> -	if (ret)
> -		goto err_free_skb;
> -
>   	skb_put(skb, len);
>   	cmd = (struct htt_cmd *)skb->data;
>   	cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FETCH_RESP;
> @@ -665,14 +687,11 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
>   	ret = ath10k_htc_send(&ar->htc, ar->htt.eid, skb);
>   	if (ret) {
>   		ath10k_warn(ar, "failed to submit htc command: %d\n",
> ret);
> -		goto err_free_resp_id;
> +		goto err_free_skb;
>   	}
>   
>   	return 0;
>   
> -err_free_resp_id:
> -	(void)resp_id; /* TODO: free resp_id */
> -
>   err_free_skb:
>   	dev_kfree_skb_any(skb);
>   
> diff --git a/drivers/net/wireless/ath/ath10k/mac.c
> b/drivers/net/wireless/ath/ath10k/mac.c
> index 0dc16de1d9fb..ee94c83b5128 100644
> --- a/drivers/net/wireless/ath/ath10k/mac.c
> +++ b/drivers/net/wireless/ath/ath10k/mac.c
> @@ -3629,11 +3629,25 @@ void ath10k_mgmt_over_wmi_tx_work(struct
> work_struct *work)
>   static bool ath10k_mac_tx_can_push(struct ieee80211_hw *hw,
>   				   struct ieee80211_txq *txq)
>   {
> -	return 1; /* TBD */
> +	struct ath10k *ar = hw->priv;
> +	struct ath10k_txq *artxq = (void *)txq->drv_priv;
> +
> +	/* No need to get locks */
> +
> +	if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH)
> +		return true;
> +
> +	if (ar->htt.num_pending_tx < ar->htt.tx_q_state.num_push_allowed)
> +		return true;
> +
> +	if (artxq->num_fw_queued < artxq->num_push_allowed)
> +		return true;
> +
> +	return false;
>   }
>   
> -static int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
> -				  struct ieee80211_txq *txq)
> +int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
> +			   struct ieee80211_txq *txq)
>   {
>   	const bool is_mgmt = false;
>   	const bool is_presp = false;
> @@ -3645,6 +3659,7 @@ static int ath10k_mac_tx_push_txq(struct
> ieee80211_hw *hw,
>   	enum ath10k_hw_txrx_mode txmode;
>   	enum ath10k_mac_tx_path txpath;
>   	struct sk_buff *skb;
> +	size_t skb_len;
>   	int ret;
>   
>   	spin_lock_bh(&ar->htt.tx_lock);
> @@ -3665,6 +3680,7 @@ static int ath10k_mac_tx_push_txq(struct
> ieee80211_hw *hw,
>   
>   	ath10k_mac_tx_h_fill_cb(ar, vif, txq, skb);
>   
> +	skb_len = skb->len;
>   	txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb);
>   	txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode);
>   
> @@ -3683,7 +3699,7 @@ static int ath10k_mac_tx_push_txq(struct
> ieee80211_hw *hw,
>   	artxq->num_fw_queued++;
>   	spin_unlock_bh(&ar->htt.tx_lock);
>   
> -	return 0;
> +	return skb_len;
>   }
>   
>   static void ath10k_mac_tx_push(struct ieee80211_hw *hw,
> @@ -3693,7 +3709,7 @@ static void ath10k_mac_tx_push(struct ieee80211_hw
> *hw,
>   	int ret;
>   
>   	ret = ath10k_mac_tx_push_txq(hw, txq);
> -	if (unlikely(ret)) {
> +	if (unlikely(ret < 0)) {
>   		if (txq->sta)
>   			ath10k_warn(ar, "failed to push tx to station %pM
> tid %hhu: %d\n",
>   				    txq->sta->addr, txq->tid, ret);
> @@ -3738,7 +3754,7 @@ static void ath10k_mac_tx_push_pending_txq(struct
> ieee80211_hw *hw,
>   			break;
>   
>   		ret = ath10k_mac_tx_push_txq(hw, txq);
> -		if (ret)
> +		if (ret < 0)
>   			break;
>   	}
>   
> @@ -3820,6 +3836,26 @@ static void ath10k_mac_txq_unref(struct ath10k *ar,
>   	spin_unlock_bh(&ar->htt.tx_lock);
>   }
>   
> +struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar,
> +					    u16 peer_id,
> +					    u8 tid)
> +{
> +	struct ath10k_peer *peer;
> +
> +	lockdep_assert_held(&ar->data_lock);
> +
> +	peer = ar->peer_map[peer_id];
> +	if (!peer)
> +		return NULL;
> +
> +	if (peer->sta)
> +		return peer->sta->txq[tid];
> +	else if (peer->vif)
> +		return peer->vif->txq;
> +	else
> +		return NULL;
> +}
> +
>   /************/
>   /* Scanning */
>   /************/
> diff --git a/drivers/net/wireless/ath/ath10k/mac.h
> b/drivers/net/wireless/ath/ath10k/mac.h
> index 453f606a250e..2c3327beb445 100644
> --- a/drivers/net/wireless/ath/ath10k/mac.h
> +++ b/drivers/net/wireless/ath/ath10k/mac.h
> @@ -76,6 +76,11 @@ void ath10k_mac_vif_tx_lock(struct ath10k_vif *arvif,
> int reason);
>   void ath10k_mac_vif_tx_unlock(struct ath10k_vif *arvif, int reason);
>   bool ath10k_mac_tx_frm_has_freq(struct ath10k *ar);
>   void ath10k_mac_tx_push_pending(struct ath10k *ar);
> +int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
> +			   struct ieee80211_txq *txq);
> +struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar,
> +					    u16 peer_id,
> +					    u8 tid);
>   
>   static inline struct ath10k_vif *ath10k_vif_to_arvif(struct ieee80211_vif
> *vif)
>   {

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michal Kazior Jan. 22, 2016, 7:47 a.m. UTC | #2
On 21 January 2016 at 18:40, Peter Oh <poh@codeaurora.org> wrote:
>
> On 01/21/2016 05:46 AM, Michal Kazior wrote:
>>
>> The current/old tx path design was that host, at
>> its own leisure, pushed tx frames to the device.
>> For HTT there was ~1000-1400 msdu queue depth.
>>
>> After reaching that limit the driver would request
>> mac80211 to stop queues. There was little control
>> over what packets got in there as far as
>> DA/RA was considered so it was rather easy to
>> starve per-station traffic flows.
>>
>> With MU-MIMO this became a significant problem
>> because the queue depth was insufficient to buffer
>> frames from multiple clients (which could have
>> different signal quality and capabilities) in an
>> efficient fashion.
>>
>> Hence the new tx path in 10.4 was introduced: a
>> pull-push mode.
>>
>> Firmware and host can share tx queue state via
>> DMA. The state is logically a 2 dimensional array
>> addressed via peer_id+tid pair. Each entry is a
>> counter (either number of bytes or packets. Host
>> keeps it updated and firmware uses it for
>> scheduling Tx pull requests to host.
>>
>> This allows MU-MIMO to become a lot more effective
>> with 10+ clients.
>>
>> Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
>> ---
>>   drivers/net/wireless/ath/ath10k/core.h   |   1 +
>>   drivers/net/wireless/ath/ath10k/htt.h    |   6 ++
>>   drivers/net/wireless/ath/ath10k/htt_rx.c | 117
>> ++++++++++++++++++++++++++++---
>>   drivers/net/wireless/ath/ath10k/htt_tx.c |  39 ++++++++---
>>   drivers/net/wireless/ath/ath10k/mac.c    |  48 +++++++++++--
>>   drivers/net/wireless/ath/ath10k/mac.h    |   5 ++
>>   6 files changed, 192 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath10k/core.h
>> b/drivers/net/wireless/ath/ath10k/core.h
>> index f51887c6be74..ab8cbccc0f4b 100644
>> --- a/drivers/net/wireless/ath/ath10k/core.h
>> +++ b/drivers/net/wireless/ath/ath10k/core.h
>> @@ -307,6 +307,7 @@ struct ath10k_peer {
>>     struct ath10k_txq {
>>         unsigned long num_fw_queued;
>> +       unsigned long num_push_allowed;
>>   };
>>     struct ath10k_sta {
>> diff --git a/drivers/net/wireless/ath/ath10k/htt.h
>> b/drivers/net/wireless/ath/ath10k/htt.h
>> index b1e40f44e76b..02cf55d306e8 100644
>> --- a/drivers/net/wireless/ath/ath10k/htt.h
>> +++ b/drivers/net/wireless/ath/ath10k/htt.h
>> @@ -1652,6 +1652,7 @@ struct ath10k_htt {
>>         struct sk_buff_head tx_compl_q;
>>         struct sk_buff_head rx_compl_q;
>>         struct sk_buff_head rx_in_ord_compl_q;
>> +       struct sk_buff_head tx_fetch_ind_q;
>>         /* rx_status template */
>>         struct ieee80211_rx_status rx_status;
>> @@ -1670,8 +1671,10 @@ struct ath10k_htt {
>>                 bool enabled;
>>                 struct htt_q_state *vaddr;
>>                 dma_addr_t paddr;
>> +               u16 num_push_allowed;
>>                 u16 num_peers;
>>                 u16 num_tids;
>> +               enum htt_tx_mode_switch_mode mode;
>>                 enum htt_q_depth_type type;
>>         } tx_q_state;
>>   };
>> @@ -1761,6 +1764,9 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
>>     void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
>>                               struct ieee80211_txq *txq);
>> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
>> +                             struct ieee80211_txq *txq);
>> +void ath10k_htt_tx_txq_sync(struct ath10k *ar);
>>   void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt,
>>                                bool is_mgmt);
>>   int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt,
>> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c
>> b/drivers/net/wireless/ath/ath10k/htt_rx.c
>> index 6e3d95c95568..827c8369b589 100644
>> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
>> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
>> @@ -229,6 +229,7 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt)
>>         skb_queue_purge(&htt->tx_compl_q);
>>         skb_queue_purge(&htt->rx_compl_q);
>>         skb_queue_purge(&htt->rx_in_ord_compl_q);
>> +       skb_queue_purge(&htt->tx_fetch_ind_q);
>>         ath10k_htt_rx_ring_free(htt);
>>   @@ -569,6 +570,7 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
>>         skb_queue_head_init(&htt->tx_compl_q);
>>         skb_queue_head_init(&htt->rx_compl_q);
>>         skb_queue_head_init(&htt->rx_in_ord_compl_q);
>> +       skb_queue_head_init(&htt->tx_fetch_ind_q);
>>         tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
>>                      (unsigned long)htt);
>> @@ -2004,16 +2006,21 @@ static void
>> ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
>>     static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct
>> sk_buff
>> *skb)
>>   {
>> +       struct ieee80211_hw *hw = ar->hw;
>> +       struct ieee80211_txq *txq;
>>         struct htt_resp *resp = (struct htt_resp *)skb->data;
>>         struct htt_tx_fetch_record *record;
>>         size_t len;
>>         size_t max_num_bytes;
>>         size_t max_num_msdus;
>> +       size_t num_bytes;
>> +       size_t num_msdus;
>>         const __le32 *resp_ids;
>>         u16 num_records;
>>         u16 num_resp_ids;
>>         u16 peer_id;
>>         u8 tid;
>> +       int ret;
>>         int i;
>>         ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n");
>> @@ -2039,7 +2046,17 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
>> ath10k *ar, struct sk_buff *skb)
>>                    num_records, num_resp_ids,
>>                    le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num));
>>   -     /* TODO: runtime sanity checks */
>> +       if (!ar->htt.tx_q_state.enabled) {
>> +               ath10k_warn(ar, "received unexpected tx_fetch_ind event:
>> not enabled\n");
>> +               return;
>> +       }
>> +
>> +       if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) {
>> +               ath10k_warn(ar, "received unexpected tx_fetch_ind event:
>> in push mode\n");
>> +               return;
>> +       }
>> +
>> +       rcu_read_lock();
>>         for (i = 0; i < num_records; i++) {
>>                 record = &resp->tx_fetch_ind.records[i];
>> @@ -2060,13 +2077,56 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
>> ath10k *ar, struct sk_buff *skb)
>>                         continue;
>>                 }
>>   -             /* TODO: dequeue and submit tx to device */
>> +               spin_lock_bh(&ar->data_lock);
>> +               txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
>> +               spin_unlock_bh(&ar->data_lock);
>> +
>> +               /* It is okay to release the lock and use txq because RCU
>> read
>> +                * lock is held.
>> +                */
>> +
>> +               if (unlikely(!txq)) {
>> +                       ath10k_warn(ar, "failed to lookup txq for peer_id
>> %hu tid %hhu\n",
>> +                                   peer_id, tid);
>> +                       continue;
>> +               }
>> +
>> +               num_msdus = 0;
>> +               num_bytes = 0;
>> +
>> +               while (num_msdus < max_num_msdus &&
>> +                      num_bytes < max_num_bytes) {
>> +                       ret = ath10k_mac_tx_push_txq(hw, txq);
>> +                       if (ret < 0)
>> +                               break;
>> +
>> +                       num_msdus++;
>> +                       num_bytes += ret;
>> +               }
>> +
>> +               record->num_msdus = cpu_to_le16(num_msdus);
>> +               record->num_bytes = cpu_to_le32(num_bytes);
>> +
>> +               ath10k_htt_tx_txq_recalc(hw, txq);
>>         }
>>   +     rcu_read_unlock();
>> +
>>         resp_ids =
>> ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind);
>>         ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids,
>> num_resp_ids);
>>   -     /* TODO: generate and send fetch response to device */
>> +       ret = ath10k_htt_tx_fetch_resp(ar,
>> +                                      resp->tx_fetch_ind.token,
>> +                                      resp->tx_fetch_ind.fetch_seq_num,
>> +                                      resp->tx_fetch_ind.records,
>> +                                      num_records);
>> +       if (unlikely(ret)) {
>> +               ath10k_warn(ar, "failed to submit tx fetch resp for token
>> 0x%08x: %d\n",
>> +                           le32_to_cpu(resp->tx_fetch_ind.token), ret);
>> +               /* FIXME: request fw restart */
>> +       }
>> +
>> +       ath10k_htt_tx_txq_sync(ar);
>>   }
>>     static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar,
>> @@ -2102,6 +2162,8 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
>> ath10k *ar,
>>   {
>>         const struct htt_resp *resp = (void *)skb->data;
>>         const struct htt_tx_mode_switch_record *record;
>> +       struct ieee80211_txq *txq;
>> +       struct ath10k_txq *artxq;
>>         size_t len;
>>         size_t num_records;
>>         enum htt_tx_mode_switch_mode mode;
>> @@ -2153,7 +2215,11 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
>> ath10k *ar,
>>         if (!enable)
>>                 return;
>>   -     /* TODO: apply configuration */
>> +       ar->htt.tx_q_state.enabled = enable;
>> +       ar->htt.tx_q_state.mode = mode;
>> +       ar->htt.tx_q_state.num_push_allowed = threshold;
>> +
>> +       rcu_read_lock();
>>         for (i = 0; i < num_records; i++) {
>>                 record = &resp->tx_mode_switch_ind.records[i];
>> @@ -2168,10 +2234,33 @@ static void
>> ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
>>                         continue;
>>                 }
>>   -             /* TODO: apply configuration */
>> +               spin_lock_bh(&ar->data_lock);
>> +               txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
>> +               spin_unlock_bh(&ar->data_lock);
>> +
>> +               /* It is okay to release the lock and use txq because RCU
>> read
>> +                * lock is held.
>> +                */
>> +
>> +               if (unlikely(!txq)) {
>> +                       ath10k_warn(ar, "failed to lookup txq for peer_id
>> %hu tid %hhu\n",
>> +                                   peer_id, tid);
>> +                       continue;
>> +               }
>> +
>> +               spin_lock_bh(&ar->htt.tx_lock);
>> +               artxq = (void *)txq->drv_priv;
>> +               artxq->num_push_allowed =
>> le16_to_cpu(record->num_max_msdus);
>> +               spin_unlock_bh(&ar->htt.tx_lock);
>>         }
>>   -     /* TODO: apply configuration */
>> +       rcu_read_unlock();
>> +
>> +       spin_lock_bh(&ar->htt.tx_lock);
>> +       ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
>> +       spin_unlock_bh(&ar->htt.tx_lock);
>> +
>
> Isn't it proved it break Mesh from working?

Yes, good point. I'm still working on this - I should've mentioned
that in the cover letter.

Nonetheless I wanted to get the review process going for this patchset.

I'll address the mesh problem in v2.


Micha?
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michal Kazior Jan. 26, 2016, 10:28 a.m. UTC | #3
On 22 January 2016 at 08:47, Michal Kazior <michal.kazior@tieto.com> wrote:
> On 21 January 2016 at 18:40, Peter Oh <poh@codeaurora.org> wrote:
>> On 01/21/2016 05:46 AM, Michal Kazior wrote:
[...]
>>>   -     /* TODO: apply configuration */
>>> +       rcu_read_unlock();
>>> +
>>> +       spin_lock_bh(&ar->htt.tx_lock);
>>> +       ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
>>> +       spin_unlock_bh(&ar->htt.tx_lock);
>>> +
>>
>> Isn't it proved it break Mesh from working?
>
> Yes, good point. I'm still working on this - I should've mentioned
> that in the cover letter.
>
> Nonetheless I wanted to get the review process going for this patchset.
>
> I'll address the mesh problem in v2.

For the record - the problem turned out to be a bug in mac80211 mesh
fwding, see:

  https://patchwork.kernel.org/patch/8108711/


Micha?
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Oh Jan. 26, 2016, 7:03 p.m. UTC | #4
On 01/26/2016 02:28 AM, Michal Kazior wrote:
> On 22 January 2016 at 08:47, Michal Kazior <michal.kazior@tieto.com> wrote:
>> On 21 January 2016 at 18:40, Peter Oh <poh@codeaurora.org> wrote:
>>> On 01/21/2016 05:46 AM, Michal Kazior wrote:
> [...]
>>>>    -     /* TODO: apply configuration */
>>>> +       rcu_read_unlock();
>>>> +
>>>> +       spin_lock_bh(&ar->htt.tx_lock);
>>>> +       ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
>>>> +       spin_unlock_bh(&ar->htt.tx_lock);
>>>> +
>>> Isn't it proved it break Mesh from working?
>> Yes, good point. I'm still working on this - I should've mentioned
>> that in the cover letter.
>>
>> Nonetheless I wanted to get the review process going for this patchset.
>>
>> I'll address the mesh problem in v2.
> For the record - the problem turned out to be a bug in mac80211 mesh
> fwding, see:
>
>    https://patchwork.kernel.org/patch/8108711/
I've verified that the patch fixed the mesh issue.
Thank you Michal.
>
>
> Micha?

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kalle Valo Jan. 28, 2016, 8:36 a.m. UTC | #5
Peter Oh <poh@codeaurora.org> writes:

>> +	spin_lock_bh(&ar->htt.tx_lock);
>> +	ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
>> +	spin_unlock_bh(&ar->htt.tx_lock);
>> +
>
> Isn't it proved it break Mesh from working?

Please think of others and edit your quotes, it was annoying to find
this oneliner from a 500+ line message.
diff mbox

Patch

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index f51887c6be74..ab8cbccc0f4b 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -307,6 +307,7 @@  struct ath10k_peer {
 
 struct ath10k_txq {
 	unsigned long num_fw_queued;
+	unsigned long num_push_allowed;
 };
 
 struct ath10k_sta {
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index b1e40f44e76b..02cf55d306e8 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -1652,6 +1652,7 @@  struct ath10k_htt {
 	struct sk_buff_head tx_compl_q;
 	struct sk_buff_head rx_compl_q;
 	struct sk_buff_head rx_in_ord_compl_q;
+	struct sk_buff_head tx_fetch_ind_q;
 
 	/* rx_status template */
 	struct ieee80211_rx_status rx_status;
@@ -1670,8 +1671,10 @@  struct ath10k_htt {
 		bool enabled;
 		struct htt_q_state *vaddr;
 		dma_addr_t paddr;
+		u16 num_push_allowed;
 		u16 num_peers;
 		u16 num_tids;
+		enum htt_tx_mode_switch_mode mode;
 		enum htt_q_depth_type type;
 	} tx_q_state;
 };
@@ -1761,6 +1764,9 @@  int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
 
 void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
 			      struct ieee80211_txq *txq);
+void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq);
+void ath10k_htt_tx_txq_sync(struct ath10k *ar);
 void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt,
 			       bool is_mgmt);
 int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt,
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 6e3d95c95568..827c8369b589 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -229,6 +229,7 @@  void ath10k_htt_rx_free(struct ath10k_htt *htt)
 	skb_queue_purge(&htt->tx_compl_q);
 	skb_queue_purge(&htt->rx_compl_q);
 	skb_queue_purge(&htt->rx_in_ord_compl_q);
+	skb_queue_purge(&htt->tx_fetch_ind_q);
 
 	ath10k_htt_rx_ring_free(htt);
 
@@ -569,6 +570,7 @@  int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
 	skb_queue_head_init(&htt->tx_compl_q);
 	skb_queue_head_init(&htt->rx_compl_q);
 	skb_queue_head_init(&htt->rx_in_ord_compl_q);
+	skb_queue_head_init(&htt->tx_fetch_ind_q);
 
 	tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
 		     (unsigned long)htt);
@@ -2004,16 +2006,21 @@  static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
 
 static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb)
 {
+	struct ieee80211_hw *hw = ar->hw;
+	struct ieee80211_txq *txq;
 	struct htt_resp *resp = (struct htt_resp *)skb->data;
 	struct htt_tx_fetch_record *record;
 	size_t len;
 	size_t max_num_bytes;
 	size_t max_num_msdus;
+	size_t num_bytes;
+	size_t num_msdus;
 	const __le32 *resp_ids;
 	u16 num_records;
 	u16 num_resp_ids;
 	u16 peer_id;
 	u8 tid;
+	int ret;
 	int i;
 
 	ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n");
@@ -2039,7 +2046,17 @@  static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb)
 		   num_records, num_resp_ids,
 		   le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num));
 
-	/* TODO: runtime sanity checks */
+	if (!ar->htt.tx_q_state.enabled) {
+		ath10k_warn(ar, "received unexpected tx_fetch_ind event: not enabled\n");
+		return;
+	}
+
+	if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) {
+		ath10k_warn(ar, "received unexpected tx_fetch_ind event: in push mode\n");
+		return;
+	}
+
+	rcu_read_lock();
 
 	for (i = 0; i < num_records; i++) {
 		record = &resp->tx_fetch_ind.records[i];
@@ -2060,13 +2077,56 @@  static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb)
 			continue;
 		}
 
-		/* TODO: dequeue and submit tx to device */
+		spin_lock_bh(&ar->data_lock);
+		txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
+		spin_unlock_bh(&ar->data_lock);
+
+		/* It is okay to release the lock and use txq because RCU read
+		 * lock is held.
+		 */
+
+		if (unlikely(!txq)) {
+			ath10k_warn(ar, "failed to lookup txq for peer_id %hu tid %hhu\n",
+				    peer_id, tid);
+			continue;
+		}
+
+		num_msdus = 0;
+		num_bytes = 0;
+
+		while (num_msdus < max_num_msdus &&
+		       num_bytes < max_num_bytes) {
+			ret = ath10k_mac_tx_push_txq(hw, txq);
+			if (ret < 0)
+				break;
+
+			num_msdus++;
+			num_bytes += ret;
+		}
+
+		record->num_msdus = cpu_to_le16(num_msdus);
+		record->num_bytes = cpu_to_le32(num_bytes);
+
+		ath10k_htt_tx_txq_recalc(hw, txq);
 	}
 
+	rcu_read_unlock();
+
 	resp_ids = ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind);
 	ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids, num_resp_ids);
 
-	/* TODO: generate and send fetch response to device */
+	ret = ath10k_htt_tx_fetch_resp(ar,
+				       resp->tx_fetch_ind.token,
+				       resp->tx_fetch_ind.fetch_seq_num,
+				       resp->tx_fetch_ind.records,
+				       num_records);
+	if (unlikely(ret)) {
+		ath10k_warn(ar, "failed to submit tx fetch resp for token 0x%08x: %d\n",
+			    le32_to_cpu(resp->tx_fetch_ind.token), ret);
+		/* FIXME: request fw restart */
+	}
+
+	ath10k_htt_tx_txq_sync(ar);
 }
 
 static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar,
@@ -2102,6 +2162,8 @@  static void ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
 {
 	const struct htt_resp *resp = (void *)skb->data;
 	const struct htt_tx_mode_switch_record *record;
+	struct ieee80211_txq *txq;
+	struct ath10k_txq *artxq;
 	size_t len;
 	size_t num_records;
 	enum htt_tx_mode_switch_mode mode;
@@ -2153,7 +2215,11 @@  static void ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
 	if (!enable)
 		return;
 
-	/* TODO: apply configuration */
+	ar->htt.tx_q_state.enabled = enable;
+	ar->htt.tx_q_state.mode = mode;
+	ar->htt.tx_q_state.num_push_allowed = threshold;
+
+	rcu_read_lock();
 
 	for (i = 0; i < num_records; i++) {
 		record = &resp->tx_mode_switch_ind.records[i];
@@ -2168,10 +2234,33 @@  static void ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
 			continue;
 		}
 
-		/* TODO: apply configuration */
+		spin_lock_bh(&ar->data_lock);
+		txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
+		spin_unlock_bh(&ar->data_lock);
+
+		/* It is okay to release the lock and use txq because RCU read
+		 * lock is held.
+		 */
+
+		if (unlikely(!txq)) {
+			ath10k_warn(ar, "failed to lookup txq for peer_id %hu tid %hhu\n",
+				    peer_id, tid);
+			continue;
+		}
+
+		spin_lock_bh(&ar->htt.tx_lock);
+		artxq = (void *)txq->drv_priv;
+		artxq->num_push_allowed = le16_to_cpu(record->num_max_msdus);
+		spin_unlock_bh(&ar->htt.tx_lock);
 	}
 
-	/* TODO: apply configuration */
+	rcu_read_unlock();
+
+	spin_lock_bh(&ar->htt.tx_lock);
+	ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
+	spin_unlock_bh(&ar->htt.tx_lock);
+
+	ath10k_mac_tx_push_pending(ar);
 }
 
 void ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
@@ -2317,8 +2406,9 @@  void ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 	case HTT_T2H_MSG_TYPE_AGGR_CONF:
 		break;
 	case HTT_T2H_MSG_TYPE_TX_FETCH_IND:
-		ath10k_htt_rx_tx_fetch_ind(ar, skb);
-		break;
+		skb_queue_tail(&htt->tx_fetch_ind_q, skb);
+		tasklet_schedule(&htt->txrx_compl_task);
+		return;
 	case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM:
 		ath10k_htt_rx_tx_fetch_confirm(ar, skb);
 		break;
@@ -2356,21 +2446,32 @@  static void ath10k_htt_txrx_compl_task(unsigned long ptr)
 	struct ath10k_htt *htt = (struct ath10k_htt *)ptr;
 	struct ath10k *ar = htt->ar;
 	struct sk_buff_head tx_q;
+	struct sk_buff_head tx_ind_q;
 	struct htt_resp *resp;
 	struct sk_buff *skb;
 	unsigned long flags;
 
 	__skb_queue_head_init(&tx_q);
+	__skb_queue_head_init(&tx_ind_q);
 
 	spin_lock_irqsave(&htt->tx_compl_q.lock, flags);
 	skb_queue_splice_init(&htt->tx_compl_q, &tx_q);
 	spin_unlock_irqrestore(&htt->tx_compl_q.lock, flags);
 
+	spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
+	skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
+	spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
+
 	while ((skb = __skb_dequeue(&tx_q))) {
 		ath10k_htt_rx_frm_tx_compl(htt->ar, skb);
 		dev_kfree_skb_any(skb);
 	}
 
+	while ((skb = __skb_dequeue(&tx_ind_q))) {
+		ath10k_htt_rx_tx_fetch_ind(ar, skb);
+		dev_kfree_skb_any(skb);
+	}
+
 	ath10k_mac_tx_push_pending(ar);
 
 	spin_lock_bh(&htt->rx_ring.lock);
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index 5ef0a1b3773c..00877ff6861e 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -62,6 +62,9 @@  static void __ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
 	if (!ar->htt.tx_q_state.enabled)
 		return;
 
+	if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
+		return;
+
 	if (txq->sta)
 		peer_id = arsta->peer_id;
 	else
@@ -97,6 +100,9 @@  static void __ath10k_htt_tx_txq_sync(struct ath10k *ar)
 	if (!ar->htt.tx_q_state.enabled)
 		return;
 
+	if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
+		return;
+
 	seq = le32_to_cpu(ar->htt.tx_q_state.vaddr->seq);
 	seq++;
 	ar->htt.tx_q_state.vaddr->seq = cpu_to_le32(seq);
@@ -111,6 +117,23 @@  static void __ath10k_htt_tx_txq_sync(struct ath10k *ar)
 				   DMA_TO_DEVICE);
 }
 
+void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq)
+{
+	struct ath10k *ar = hw->priv;
+
+	spin_lock_bh(&ar->htt.tx_lock);
+	__ath10k_htt_tx_txq_recalc(hw, txq);
+	spin_unlock_bh(&ar->htt.tx_lock);
+}
+
+void ath10k_htt_tx_txq_sync(struct ath10k *ar)
+{
+	spin_lock_bh(&ar->htt.tx_lock);
+	__ath10k_htt_tx_txq_sync(ar);
+	spin_unlock_bh(&ar->htt.tx_lock);
+}
+
 void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
 			      struct ieee80211_txq *txq)
 {
@@ -634,10 +657,14 @@  int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
 {
 	struct sk_buff *skb;
 	struct htt_cmd *cmd;
-	u16 resp_id;
+	const u16 resp_id = 0;
 	int len = 0;
 	int ret;
 
+	/* Response IDs are echo-ed back only for host driver convienence
+	 * purposes. They aren't used for anything in the driver yet so use 0.
+	 */
+
 	len += sizeof(cmd->hdr);
 	len += sizeof(cmd->tx_fetch_resp);
 	len += sizeof(cmd->tx_fetch_resp.records[0]) * num_records;
@@ -646,11 +673,6 @@  int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
 	if (!skb)
 		return -ENOMEM;
 
-	resp_id = 0; /* TODO: allocate resp_id */
-	ret = 0;
-	if (ret)
-		goto err_free_skb;
-
 	skb_put(skb, len);
 	cmd = (struct htt_cmd *)skb->data;
 	cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FETCH_RESP;
@@ -665,14 +687,11 @@  int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
 	ret = ath10k_htc_send(&ar->htc, ar->htt.eid, skb);
 	if (ret) {
 		ath10k_warn(ar, "failed to submit htc command: %d\n", ret);
-		goto err_free_resp_id;
+		goto err_free_skb;
 	}
 
 	return 0;
 
-err_free_resp_id:
-	(void)resp_id; /* TODO: free resp_id */
-
 err_free_skb:
 	dev_kfree_skb_any(skb);
 
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 0dc16de1d9fb..ee94c83b5128 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -3629,11 +3629,25 @@  void ath10k_mgmt_over_wmi_tx_work(struct work_struct *work)
 static bool ath10k_mac_tx_can_push(struct ieee80211_hw *hw,
 				   struct ieee80211_txq *txq)
 {
-	return 1; /* TBD */
+	struct ath10k *ar = hw->priv;
+	struct ath10k_txq *artxq = (void *)txq->drv_priv;
+
+	/* No need to get locks */
+
+	if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH)
+		return true;
+
+	if (ar->htt.num_pending_tx < ar->htt.tx_q_state.num_push_allowed)
+		return true;
+
+	if (artxq->num_fw_queued < artxq->num_push_allowed)
+		return true;
+
+	return false;
 }
 
-static int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
-				  struct ieee80211_txq *txq)
+int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
+			   struct ieee80211_txq *txq)
 {
 	const bool is_mgmt = false;
 	const bool is_presp = false;
@@ -3645,6 +3659,7 @@  static int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
 	enum ath10k_hw_txrx_mode txmode;
 	enum ath10k_mac_tx_path txpath;
 	struct sk_buff *skb;
+	size_t skb_len;
 	int ret;
 
 	spin_lock_bh(&ar->htt.tx_lock);
@@ -3665,6 +3680,7 @@  static int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
 
 	ath10k_mac_tx_h_fill_cb(ar, vif, txq, skb);
 
+	skb_len = skb->len;
 	txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb);
 	txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode);
 
@@ -3683,7 +3699,7 @@  static int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
 	artxq->num_fw_queued++;
 	spin_unlock_bh(&ar->htt.tx_lock);
 
-	return 0;
+	return skb_len;
 }
 
 static void ath10k_mac_tx_push(struct ieee80211_hw *hw,
@@ -3693,7 +3709,7 @@  static void ath10k_mac_tx_push(struct ieee80211_hw *hw,
 	int ret;
 
 	ret = ath10k_mac_tx_push_txq(hw, txq);
-	if (unlikely(ret)) {
+	if (unlikely(ret < 0)) {
 		if (txq->sta)
 			ath10k_warn(ar, "failed to push tx to station %pM tid %hhu: %d\n",
 				    txq->sta->addr, txq->tid, ret);
@@ -3738,7 +3754,7 @@  static void ath10k_mac_tx_push_pending_txq(struct ieee80211_hw *hw,
 			break;
 
 		ret = ath10k_mac_tx_push_txq(hw, txq);
-		if (ret)
+		if (ret < 0)
 			break;
 	}
 
@@ -3820,6 +3836,26 @@  static void ath10k_mac_txq_unref(struct ath10k *ar,
 	spin_unlock_bh(&ar->htt.tx_lock);
 }
 
+struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar,
+					    u16 peer_id,
+					    u8 tid)
+{
+	struct ath10k_peer *peer;
+
+	lockdep_assert_held(&ar->data_lock);
+
+	peer = ar->peer_map[peer_id];
+	if (!peer)
+		return NULL;
+
+	if (peer->sta)
+		return peer->sta->txq[tid];
+	else if (peer->vif)
+		return peer->vif->txq;
+	else
+		return NULL;
+}
+
 /************/
 /* Scanning */
 /************/
diff --git a/drivers/net/wireless/ath/ath10k/mac.h b/drivers/net/wireless/ath/ath10k/mac.h
index 453f606a250e..2c3327beb445 100644
--- a/drivers/net/wireless/ath/ath10k/mac.h
+++ b/drivers/net/wireless/ath/ath10k/mac.h
@@ -76,6 +76,11 @@  void ath10k_mac_vif_tx_lock(struct ath10k_vif *arvif, int reason);
 void ath10k_mac_vif_tx_unlock(struct ath10k_vif *arvif, int reason);
 bool ath10k_mac_tx_frm_has_freq(struct ath10k *ar);
 void ath10k_mac_tx_push_pending(struct ath10k *ar);
+int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
+			   struct ieee80211_txq *txq);
+struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar,
+					    u16 peer_id,
+					    u8 tid);
 
 static inline struct ath10k_vif *ath10k_vif_to_arvif(struct ieee80211_vif *vif)
 {