Message ID | 20160627055128.7005-1-rmanohar@qti.qualcomm.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Kalle Valo |
Headers | show |
broken patch CC [M] /home/seg/DEV/pb42/src/router/private/compat-wireless-2016-05-12/drivers/net/wireless/ath/ath10k/htt_tx.o /home/seg/DEV/pb42/src/router/private/compat-wireless-2016-05-12/drivers/net/wireless/ath/ath10k/htt_tx.c: In function 'ath10k_htt_tx_free': /home/seg/DEV/pb42/src/router/private/compat-wireless-2016-05-12/drivers/net/wireless/ath/ath10k/htt_tx.c:391:19: error: 'struct ath10k_htt' has no member named 'txrx_compl_task' Sebastian Am 27.06.2016 um 07:51 schrieb Rajkumar Manoharan: > Add NAPI support for rx and tx completion. NAPI poll is scheduled > from interrupt handler. The design is as below > > - on interrupt > - schedule napi and mask interrupts > - on poll > - process all pipes (no actual Tx/Rx) > - process Rx within budget > - if quota exceeds budget reschedule napi poll by returning budget > - process Tx completions and update budget if necessary > - process Tx fetch indications (pull-push) > - push any other pending Tx (if possible) > - before resched or napi completion replenish htt rx ring buffer > - if work done < budget, complete napi poll and unmask interrupts > > This change also get rid of two tasklets (intr_tq and txrx_compl_task). > > Measured peak throughput with NAPI on IPQ4019 platform in controlled > environment. No noticeable reduction in throughput is seen and also > observed improvements in CPU usage. Approx. 15% CPU usage got reduced > in UDP uplink case. > > DL: AP DUT Tx > UL: AP DUT Rx > > IPQ4019 (avg. cpu usage %) > ======== > TOT +NAPI > =========== ============= > TCP DL 644 Mbps (42%) 645 Mbps (36%) > TCP UL 673 Mbps (30%) 675 Mbps (26%) > UDP DL 682 Mbps (49%) 680 Mbps (49%) > UDP UL 720 Mbps (28%) 717 Mbps (11%) > > Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> > --- > drivers/net/wireless/ath/ath10k/ahb.c | 4 +- > drivers/net/wireless/ath/ath10k/core.c | 2 + > drivers/net/wireless/ath/ath10k/core.h | 8 ++ > drivers/net/wireless/ath/ath10k/htt.h | 2 +- > drivers/net/wireless/ath/ath10k/htt_rx.c | 154 +++++++++++++++++++------------ > drivers/net/wireless/ath/ath10k/pci.c | 67 +++++++++----- > drivers/net/wireless/ath/ath10k/pci.h | 5 +- > 7 files changed, 149 insertions(+), 93 deletions(-) > > diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c > index acec16b9cf49..9d4d5022be18 100644 > --- a/drivers/net/wireless/ath/ath10k/ahb.c > +++ b/drivers/net/wireless/ath/ath10k/ahb.c > @@ -462,13 +462,11 @@ static void ath10k_ahb_halt_chip(struct ath10k *ar) > static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg) > { > struct ath10k *ar = arg; > - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); > > if (!ath10k_pci_irq_pending(ar)) > return IRQ_NONE; > > ath10k_pci_disable_and_clear_legacy_irq(ar); > - tasklet_schedule(&ar_pci->intr_tq); > > return IRQ_HANDLED; > } > @@ -831,7 +829,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) > goto err_resource_deinit; > } > > - ath10k_pci_init_irq_tasklets(ar); > + ath10k_pci_init_napi(ar); > > ret = ath10k_ahb_request_irq_legacy(ar); > if (ret) > diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c > index dfb3db0ee5d1..2639e7e0836b 100644 > --- a/drivers/net/wireless/ath/ath10k/core.c > +++ b/drivers/net/wireless/ath/ath10k/core.c > @@ -2223,6 +2223,8 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, > INIT_WORK(&ar->register_work, ath10k_core_register_work); > INIT_WORK(&ar->restart_work, ath10k_core_restart); > > + init_dummy_netdev(&ar->napi_dev); > + > ret = ath10k_debug_create(ar); > if (ret) > goto err_free_aux_wq; > diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h > index 3da18c9dbd7a..10878926b629 100644 > --- a/drivers/net/wireless/ath/ath10k/core.h > +++ b/drivers/net/wireless/ath/ath10k/core.h > @@ -65,6 +65,10 @@ > #define ATH10K_KEEPALIVE_MAX_IDLE 3895 > #define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900 > > +/* NAPI poll budget */ > +#define ATH10K_NAPI_BUDGET 64 > +#define ATH10K_NAPI_QUOTA_LIMIT 60 > + > struct ath10k; > > enum ath10k_bus { > @@ -926,6 +930,10 @@ struct ath10k { > struct ath10k_thermal thermal; > struct ath10k_wow wow; > > + /* NAPI */ > + struct net_device napi_dev; > + struct napi_struct napi; > + > /* must be last */ > u8 drv_priv[0] __aligned(sizeof(void *)); > }; > diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h > index 430a83e142aa..98c14247021b 100644 > --- a/drivers/net/wireless/ath/ath10k/htt.h > +++ b/drivers/net/wireless/ath/ath10k/htt.h > @@ -1665,7 +1665,6 @@ struct ath10k_htt { > > /* This is used to group tx/rx completions separately and process them > * in batches to reduce cache stalls */ > - struct tasklet_struct txrx_compl_task; > struct sk_buff_head rx_compl_q; > struct sk_buff_head rx_in_ord_compl_q; > struct sk_buff_head tx_fetch_ind_q; > @@ -1798,5 +1797,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, > struct sk_buff *msdu); > void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, > struct sk_buff *skb); > +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget); > > #endif > diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c > index 80e645302b54..c68b39bfc7bb 100644 > --- a/drivers/net/wireless/ath/ath10k/htt_rx.c > +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c > @@ -34,7 +34,6 @@ > #define HTT_RX_RING_REFILL_RESCHED_MS 5 > > static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb); > -static void ath10k_htt_txrx_compl_task(unsigned long ptr); > > static struct sk_buff * > ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr) > @@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar) > void ath10k_htt_rx_free(struct ath10k_htt *htt) > { > del_timer_sync(&htt->rx_ring.refill_retry_timer); > - tasklet_kill(&htt->txrx_compl_task); > > skb_queue_purge(&htt->rx_compl_q); > skb_queue_purge(&htt->rx_in_ord_compl_q); > @@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) > skb_queue_head_init(&htt->tx_fetch_ind_q); > atomic_set(&htt->num_mpdus_ready, 0); > > - tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, > - (unsigned long)htt); > - > ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n", > htt->rx_ring.size, htt->rx_ring.fill_level); > return 0; > @@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath10k *ar, > trace_ath10k_rx_hdr(ar, skb->data, skb->len); > trace_ath10k_rx_payload(ar, skb->data, skb->len); > > - ieee80211_rx(ar->hw, skb); > + ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi); > } > > static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar, > @@ -1527,7 +1522,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) > struct ath10k *ar = htt->ar; > static struct ieee80211_rx_status rx_status; > struct sk_buff_head amsdu; > - int ret; > + int ret, num_msdus; > > __skb_queue_head_init(&amsdu); > > @@ -1549,13 +1544,14 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) > return ret; > } > > + num_msdus = skb_queue_len(&amsdu); > ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff); > ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); > ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status); > ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status); > ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status); > > - return 0; > + return num_msdus; > } > > static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, > @@ -1579,15 +1575,6 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, > mpdu_count += mpdu_ranges[i].mpdu_count; > > atomic_add(mpdu_count, &htt->num_mpdus_ready); > - > - tasklet_schedule(&htt->txrx_compl_task); > -} > - > -static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt) > -{ > - atomic_inc(&htt->num_mpdus_ready); > - > - tasklet_schedule(&htt->txrx_compl_task); > } > > static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar, > @@ -1772,14 +1759,15 @@ static void ath10k_htt_rx_h_rx_offload_prot(struct ieee80211_rx_status *status, > RX_FLAG_MMIC_STRIPPED; > } > > -static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, > - struct sk_buff_head *list) > +static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar, > + struct sk_buff_head *list) > { > struct ath10k_htt *htt = &ar->htt; > struct ieee80211_rx_status *status = &htt->rx_status; > struct htt_rx_offload_msdu *rx; > struct sk_buff *msdu; > size_t offset; > + int num_msdu = 0; > > while ((msdu = __skb_dequeue(list))) { > /* Offloaded frames don't have Rx descriptor. Instead they have > @@ -1819,10 +1807,12 @@ static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, > ath10k_htt_rx_h_rx_offload_prot(status, msdu); > ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id); > ath10k_process_rx(ar, status, msdu); > + num_msdu++; > } > + return num_msdu; > } > > -static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) > +static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) > { > struct ath10k_htt *htt = &ar->htt; > struct htt_resp *resp = (void *)skb->data; > @@ -1835,12 +1825,12 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) > u8 tid; > bool offload; > bool frag; > - int ret; > + int ret, num_msdus = 0; > > lockdep_assert_held(&htt->rx_ring.lock); > > if (htt->rx_confused) > - return; > + return -EIO; > > skb_pull(skb, sizeof(resp->hdr)); > skb_pull(skb, sizeof(resp->rx_in_ord_ind)); > @@ -1859,7 +1849,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) > > if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) { > ath10k_warn(ar, "dropping invalid in order rx indication\n"); > - return; > + return -EINVAL; > } > > /* The event can deliver more than 1 A-MSDU. Each A-MSDU is later > @@ -1870,14 +1860,14 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) > if (ret < 0) { > ath10k_warn(ar, "failed to pop paddr list: %d\n", ret); > htt->rx_confused = true; > - return; > + return -EIO; > } > > /* Offloaded frames are very different and need to be handled > * separately. > */ > if (offload) > - ath10k_htt_rx_h_rx_offload(ar, &list); > + num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list); > > while (!skb_queue_empty(&list)) { > __skb_queue_head_init(&amsdu); > @@ -1890,6 +1880,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) > * better to report something than nothing though. This > * should still give an idea about rx rate to the user. > */ > + num_msdus += skb_queue_len(&amsdu); > ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); > ath10k_htt_rx_h_filter(ar, &amsdu, status); > ath10k_htt_rx_h_mpdu(ar, &amsdu, status); > @@ -1902,9 +1893,10 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) > ath10k_warn(ar, "failed to extract amsdu: %d\n", ret); > htt->rx_confused = true; > __skb_queue_purge(&list); > - return; > + return -EIO; > } > } > + return num_msdus; > } > > static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, > @@ -2267,7 +2259,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) > } > case HTT_T2H_MSG_TYPE_TX_COMPL_IND: > ath10k_htt_rx_tx_compl_ind(htt->ar, skb); > - tasklet_schedule(&htt->txrx_compl_task); > break; > case HTT_T2H_MSG_TYPE_SEC_IND: { > struct ath10k *ar = htt->ar; > @@ -2284,7 +2275,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) > case HTT_T2H_MSG_TYPE_RX_FRAG_IND: { > ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ", > skb->data, skb->len); > - ath10k_htt_rx_frag_handler(htt); > + atomic_inc(&htt->num_mpdus_ready); > break; > } > case HTT_T2H_MSG_TYPE_TEST: > @@ -2322,8 +2313,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) > break; > } > case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: { > - skb_queue_tail(&htt->rx_in_ord_compl_q, skb); > - tasklet_schedule(&htt->txrx_compl_task); > + __skb_queue_tail(&htt->rx_in_ord_compl_q, skb); > return false; > } > case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND: > @@ -2349,7 +2339,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) > break; > } > skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind); > - tasklet_schedule(&htt->txrx_compl_task); > break; > } > case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM: > @@ -2378,27 +2367,77 @@ void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, > } > EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler); > > -static void ath10k_htt_txrx_compl_task(unsigned long ptr) > +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) > { > - struct ath10k_htt *htt = (struct ath10k_htt *)ptr; > - struct ath10k *ar = htt->ar; > + struct ath10k_htt *htt = &ar->htt; > struct htt_tx_done tx_done = {}; > - struct sk_buff_head rx_ind_q; > struct sk_buff_head tx_ind_q; > struct sk_buff *skb; > unsigned long flags; > - int num_mpdus; > + int quota = 0, done, num_rx_msdus; > + bool resched_napi = false; > > - __skb_queue_head_init(&rx_ind_q); > __skb_queue_head_init(&tx_ind_q); > > - spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags); > - skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q); > - spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags); > + /* Since in-ord-ind can deliver more than 1 A-MSDU in single event, > + * process it first to utilize full available quota. > + */ > + while (quota < budget) { > + if (skb_queue_empty(&htt->rx_in_ord_compl_q)) > + break; > > - spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); > - skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); > - spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); > + skb = __skb_dequeue(&htt->rx_in_ord_compl_q); > + if (!skb) { > + resched_napi = true; > + goto exit; > + } > + > + spin_lock_bh(&htt->rx_ring.lock); > + num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb); > + spin_unlock_bh(&htt->rx_ring.lock); > + if (num_rx_msdus < 0) { > + resched_napi = true; > + goto exit; > + } > + > + dev_kfree_skb_any(skb); > + if (num_rx_msdus > 0) > + quota += num_rx_msdus; > + > + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && > + !skb_queue_empty(&htt->rx_in_ord_compl_q)) { > + resched_napi = true; > + goto exit; > + } > + } > + > + while (quota < budget) { > + /* no more data to receive */ > + if (!atomic_read(&htt->num_mpdus_ready)) > + break; > + > + num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt); > + if (num_rx_msdus < 0) { > + resched_napi = true; > + goto exit; > + } > + > + quota += num_rx_msdus; > + atomic_dec(&htt->num_mpdus_ready); > + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && > + atomic_read(&htt->num_mpdus_ready)) { > + resched_napi = true; > + goto exit; > + } > + } > + > + /* From NAPI documentation: > + * The napi poll() function may also process TX completions, in which > + * case if it processes the entire TX ring then it should count that > + * work as the rest of the budget. > + */ > + if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo)) > + quota = budget; > > /* kfifo_get: called only within txrx_tasklet so it's neatly serialized. > * From kfifo_get() documentation: > @@ -2408,27 +2447,22 @@ static void ath10k_htt_txrx_compl_task(unsigned long ptr) > while (kfifo_get(&htt->txdone_fifo, &tx_done)) > ath10k_txrx_tx_unref(htt, &tx_done); > > + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); > + skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); > + spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); > + > while ((skb = __skb_dequeue(&tx_ind_q))) { > ath10k_htt_rx_tx_fetch_ind(ar, skb); > dev_kfree_skb_any(skb); > } > > - num_mpdus = atomic_read(&htt->num_mpdus_ready); > - > - while (num_mpdus) { > - if (ath10k_htt_rx_handle_amsdu(htt)) > - break; > - > - num_mpdus--; > - atomic_dec(&htt->num_mpdus_ready); > - } > - > - while ((skb = __skb_dequeue(&rx_ind_q))) { > - spin_lock_bh(&htt->rx_ring.lock); > - ath10k_htt_rx_in_ord_ind(ar, skb); > - spin_unlock_bh(&htt->rx_ring.lock); > - dev_kfree_skb_any(skb); > - } > - > +exit: > ath10k_htt_rx_msdu_buff_replenish(htt); > + /* In case of rx failure or more data to read, report budget > + * to reschedule NAPI poll > + */ > + done = resched_napi ? budget : quota; > + > + return done; > } > +EXPORT_SYMBOL(ath10k_htt_txrx_compl_task); > diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c > index f06dd3941bac..9169ab92a16a 100644 > --- a/drivers/net/wireless/ath/ath10k/pci.c > +++ b/drivers/net/wireless/ath/ath10k/pci.c > @@ -1502,12 +1502,10 @@ void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe, > ath10k_ce_per_engine_service(ar, pipe); > } > > -void ath10k_pci_kill_tasklet(struct ath10k *ar) > +static void ath10k_pci_rx_retry_sync(struct ath10k *ar) > { > struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); > > - tasklet_kill(&ar_pci->intr_tq); > - > del_timer_sync(&ar_pci->rx_post_retry); > } > > @@ -1747,7 +1745,7 @@ void ath10k_pci_ce_deinit(struct ath10k *ar) > > void ath10k_pci_flush(struct ath10k *ar) > { > - ath10k_pci_kill_tasklet(ar); > + ath10k_pci_rx_retry_sync(ar); > ath10k_pci_buffer_cleanup(ar); > } > > @@ -2754,35 +2752,53 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg) > return IRQ_NONE; > } > > - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) { > - if (!ath10k_pci_irq_pending(ar)) > - return IRQ_NONE; > - > - ath10k_pci_disable_and_clear_legacy_irq(ar); > - } > + if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) && > + !ath10k_pci_irq_pending(ar)) > + return IRQ_NONE; > > - tasklet_schedule(&ar_pci->intr_tq); > + ath10k_pci_disable_and_clear_legacy_irq(ar); > + ath10k_pci_irq_msi_fw_mask(ar); > + napi_schedule(&ar->napi); > > return IRQ_HANDLED; > } > > -static void ath10k_pci_tasklet(unsigned long data) > +static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) > { > - struct ath10k *ar = (struct ath10k *)data; > - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); > + struct ath10k *ar = container_of(ctx, struct ath10k, napi); > + int done = 0; > > if (ath10k_pci_has_fw_crashed(ar)) { > - ath10k_pci_irq_disable(ar); > ath10k_pci_fw_crashed_clear(ar); > ath10k_pci_fw_crashed_dump(ar); > - return; > + napi_complete(ctx); > + return done; > } > > ath10k_ce_per_engine_service_any(ar); > > - /* Re-enable legacy irq that was disabled in the irq handler */ > - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) > + done = ath10k_htt_txrx_compl_task(ar, budget); > + > + if (done < budget) { > + napi_complete(ctx); > + /* In case of MSI, it is possible that interrupts are received > + * while NAPI poll is inprogress. So pending interrupts that are > + * received after processing all copy engine pipes by NAPI poll > + * will not be handled again. This is causing failure to > + * complete boot sequence in x86 platform. So before enabling > + * interrupts safer to check for pending interrupts for > + * immediate servicing. > + */ > + if (CE_INTERRUPT_SUMMARY(ar)) { > + napi_reschedule(&ar->napi); > + goto out; > + } > ath10k_pci_enable_legacy_irq(ar); > + ath10k_pci_irq_msi_fw_unmask(ar); > + } > + > +out: > + return done; > } > > static int ath10k_pci_request_irq_msi(struct ath10k *ar) > @@ -2840,11 +2856,11 @@ static void ath10k_pci_free_irq(struct ath10k *ar) > free_irq(ar_pci->pdev->irq, ar); > } > > -void ath10k_pci_init_irq_tasklets(struct ath10k *ar) > +void ath10k_pci_init_napi(struct ath10k *ar) > { > - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); > - > - tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar); > + netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll, > + ATH10K_NAPI_BUDGET); > + napi_enable(&ar->napi); > } > > static int ath10k_pci_init_irq(struct ath10k *ar) > @@ -2852,7 +2868,7 @@ static int ath10k_pci_init_irq(struct ath10k *ar) > struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); > int ret; > > - ath10k_pci_init_irq_tasklets(ar); > + ath10k_pci_init_napi(ar); > > if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO) > ath10k_info(ar, "limiting irq mode to: %d\n", > @@ -3113,7 +3129,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar) > > void ath10k_pci_release_resource(struct ath10k *ar) > { > - ath10k_pci_kill_tasklet(ar); > + ath10k_pci_rx_retry_sync(ar); > + netif_napi_del(&ar->napi); > ath10k_pci_ce_deinit(ar); > ath10k_pci_free_pipes(ar); > } > @@ -3274,7 +3291,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev, > > err_free_irq: > ath10k_pci_free_irq(ar); > - ath10k_pci_kill_tasklet(ar); > + ath10k_pci_rx_retry_sync(ar); > > err_deinit_irq: > ath10k_pci_deinit_irq(ar); > diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h > index 6eca1df2ce60..974e463e8b95 100644 > --- a/drivers/net/wireless/ath/ath10k/pci.h > +++ b/drivers/net/wireless/ath/ath10k/pci.h > @@ -177,8 +177,6 @@ struct ath10k_pci { > /* Operating interrupt mode */ > enum ath10k_pci_irq_mode oper_irq_mode; > > - struct tasklet_struct intr_tq; > - > struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX]; > > /* Copy Engine used for Diagnostic Accesses */ > @@ -294,8 +292,7 @@ void ath10k_pci_free_pipes(struct ath10k *ar); > void ath10k_pci_free_pipes(struct ath10k *ar); > void ath10k_pci_rx_replenish_retry(unsigned long ptr); > void ath10k_pci_ce_deinit(struct ath10k *ar); > -void ath10k_pci_init_irq_tasklets(struct ath10k *ar); > -void ath10k_pci_kill_tasklet(struct ath10k *ar); > +void ath10k_pci_init_napi(struct ath10k *ar); > int ath10k_pci_init_pipes(struct ath10k *ar); > int ath10k_pci_init_config(struct ath10k *ar); > void ath10k_pci_rx_post(struct ath10k *ar);
forget about it. it was caused by another patch here on this list. Am 28.06.2016 um 00:45 schrieb Sebastian Gottschall: > broken patch > > CC [M] > /home/seg/DEV/pb42/src/router/private/compat-wireless-2016-05-12/drivers/net/wireless/ath/ath10k/htt_tx.o > /home/seg/DEV/pb42/src/router/private/compat-wireless-2016-05-12/drivers/net/wireless/ath/ath10k/htt_tx.c: > In function 'ath10k_htt_tx_free': > /home/seg/DEV/pb42/src/router/private/compat-wireless-2016-05-12/drivers/net/wireless/ath/ath10k/htt_tx.c:391:19: > error: 'struct ath10k_htt' has no member named 'txrx_compl_task' > > Sebastian > > Am 27.06.2016 um 07:51 schrieb Rajkumar Manoharan: >> Add NAPI support for rx and tx completion. NAPI poll is scheduled >> from interrupt handler. The design is as below >> >> - on interrupt >> - schedule napi and mask interrupts >> - on poll >> - process all pipes (no actual Tx/Rx) >> - process Rx within budget >> - if quota exceeds budget reschedule napi poll by returning budget >> - process Tx completions and update budget if necessary >> - process Tx fetch indications (pull-push) >> - push any other pending Tx (if possible) >> - before resched or napi completion replenish htt rx ring buffer >> - if work done < budget, complete napi poll and unmask interrupts >> >> This change also get rid of two tasklets (intr_tq and txrx_compl_task). >> >> Measured peak throughput with NAPI on IPQ4019 platform in controlled >> environment. No noticeable reduction in throughput is seen and also >> observed improvements in CPU usage. Approx. 15% CPU usage got reduced >> in UDP uplink case. >> >> DL: AP DUT Tx >> UL: AP DUT Rx >> >> IPQ4019 (avg. cpu usage %) >> ======== >> TOT +NAPI >> =========== ============= >> TCP DL 644 Mbps (42%) 645 Mbps (36%) >> TCP UL 673 Mbps (30%) 675 Mbps (26%) >> UDP DL 682 Mbps (49%) 680 Mbps (49%) >> UDP UL 720 Mbps (28%) 717 Mbps (11%) >> >> Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> >> --- >> drivers/net/wireless/ath/ath10k/ahb.c | 4 +- >> drivers/net/wireless/ath/ath10k/core.c | 2 + >> drivers/net/wireless/ath/ath10k/core.h | 8 ++ >> drivers/net/wireless/ath/ath10k/htt.h | 2 +- >> drivers/net/wireless/ath/ath10k/htt_rx.c | 154 >> +++++++++++++++++++------------ >> drivers/net/wireless/ath/ath10k/pci.c | 67 +++++++++----- >> drivers/net/wireless/ath/ath10k/pci.h | 5 +- >> 7 files changed, 149 insertions(+), 93 deletions(-) >> >> diff --git a/drivers/net/wireless/ath/ath10k/ahb.c >> b/drivers/net/wireless/ath/ath10k/ahb.c >> index acec16b9cf49..9d4d5022be18 100644 >> --- a/drivers/net/wireless/ath/ath10k/ahb.c >> +++ b/drivers/net/wireless/ath/ath10k/ahb.c >> @@ -462,13 +462,11 @@ static void ath10k_ahb_halt_chip(struct ath10k >> *ar) >> static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg) >> { >> struct ath10k *ar = arg; >> - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); >> if (!ath10k_pci_irq_pending(ar)) >> return IRQ_NONE; >> ath10k_pci_disable_and_clear_legacy_irq(ar); >> - tasklet_schedule(&ar_pci->intr_tq); >> return IRQ_HANDLED; >> } >> @@ -831,7 +829,7 @@ static int ath10k_ahb_probe(struct >> platform_device *pdev) >> goto err_resource_deinit; >> } >> - ath10k_pci_init_irq_tasklets(ar); >> + ath10k_pci_init_napi(ar); >> ret = ath10k_ahb_request_irq_legacy(ar); >> if (ret) >> diff --git a/drivers/net/wireless/ath/ath10k/core.c >> b/drivers/net/wireless/ath/ath10k/core.c >> index dfb3db0ee5d1..2639e7e0836b 100644 >> --- a/drivers/net/wireless/ath/ath10k/core.c >> +++ b/drivers/net/wireless/ath/ath10k/core.c >> @@ -2223,6 +2223,8 @@ struct ath10k *ath10k_core_create(size_t >> priv_size, struct device *dev, >> INIT_WORK(&ar->register_work, ath10k_core_register_work); >> INIT_WORK(&ar->restart_work, ath10k_core_restart); >> + init_dummy_netdev(&ar->napi_dev); >> + >> ret = ath10k_debug_create(ar); >> if (ret) >> goto err_free_aux_wq; >> diff --git a/drivers/net/wireless/ath/ath10k/core.h >> b/drivers/net/wireless/ath/ath10k/core.h >> index 3da18c9dbd7a..10878926b629 100644 >> --- a/drivers/net/wireless/ath/ath10k/core.h >> +++ b/drivers/net/wireless/ath/ath10k/core.h >> @@ -65,6 +65,10 @@ >> #define ATH10K_KEEPALIVE_MAX_IDLE 3895 >> #define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900 >> +/* NAPI poll budget */ >> +#define ATH10K_NAPI_BUDGET 64 >> +#define ATH10K_NAPI_QUOTA_LIMIT 60 >> + >> struct ath10k; >> enum ath10k_bus { >> @@ -926,6 +930,10 @@ struct ath10k { >> struct ath10k_thermal thermal; >> struct ath10k_wow wow; >> + /* NAPI */ >> + struct net_device napi_dev; >> + struct napi_struct napi; >> + >> /* must be last */ >> u8 drv_priv[0] __aligned(sizeof(void *)); >> }; >> diff --git a/drivers/net/wireless/ath/ath10k/htt.h >> b/drivers/net/wireless/ath/ath10k/htt.h >> index 430a83e142aa..98c14247021b 100644 >> --- a/drivers/net/wireless/ath/ath10k/htt.h >> +++ b/drivers/net/wireless/ath/ath10k/htt.h >> @@ -1665,7 +1665,6 @@ struct ath10k_htt { >> /* This is used to group tx/rx completions separately and >> process them >> * in batches to reduce cache stalls */ >> - struct tasklet_struct txrx_compl_task; >> struct sk_buff_head rx_compl_q; >> struct sk_buff_head rx_in_ord_compl_q; >> struct sk_buff_head tx_fetch_ind_q; >> @@ -1798,5 +1797,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, >> struct sk_buff *msdu); >> void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, >> struct sk_buff *skb); >> +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget); >> #endif >> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c >> b/drivers/net/wireless/ath/ath10k/htt_rx.c >> index 80e645302b54..c68b39bfc7bb 100644 >> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c >> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c >> @@ -34,7 +34,6 @@ >> #define HTT_RX_RING_REFILL_RESCHED_MS 5 >> static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb); >> -static void ath10k_htt_txrx_compl_task(unsigned long ptr); >> static struct sk_buff * >> ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr) >> @@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar) >> void ath10k_htt_rx_free(struct ath10k_htt *htt) >> { >> del_timer_sync(&htt->rx_ring.refill_retry_timer); >> - tasklet_kill(&htt->txrx_compl_task); >> skb_queue_purge(&htt->rx_compl_q); >> skb_queue_purge(&htt->rx_in_ord_compl_q); >> @@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) >> skb_queue_head_init(&htt->tx_fetch_ind_q); >> atomic_set(&htt->num_mpdus_ready, 0); >> - tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, >> - (unsigned long)htt); >> - >> ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level >> %d\n", >> htt->rx_ring.size, htt->rx_ring.fill_level); >> return 0; >> @@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath10k *ar, >> trace_ath10k_rx_hdr(ar, skb->data, skb->len); >> trace_ath10k_rx_payload(ar, skb->data, skb->len); >> - ieee80211_rx(ar->hw, skb); >> + ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi); >> } >> static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar, >> @@ -1527,7 +1522,7 @@ static int ath10k_htt_rx_handle_amsdu(struct >> ath10k_htt *htt) >> struct ath10k *ar = htt->ar; >> static struct ieee80211_rx_status rx_status; >> struct sk_buff_head amsdu; >> - int ret; >> + int ret, num_msdus; >> __skb_queue_head_init(&amsdu); >> @@ -1549,13 +1544,14 @@ static int >> ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) >> return ret; >> } >> + num_msdus = skb_queue_len(&amsdu); >> ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff); >> ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); >> ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status); >> ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status); >> ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status); >> - return 0; >> + return num_msdus; >> } >> static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, >> @@ -1579,15 +1575,6 @@ static void ath10k_htt_rx_proc_rx_ind(struct >> ath10k_htt *htt, >> mpdu_count += mpdu_ranges[i].mpdu_count; >> atomic_add(mpdu_count, &htt->num_mpdus_ready); >> - >> - tasklet_schedule(&htt->txrx_compl_task); >> -} >> - >> -static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt) >> -{ >> - atomic_inc(&htt->num_mpdus_ready); >> - >> - tasklet_schedule(&htt->txrx_compl_task); >> } >> static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar, >> @@ -1772,14 +1759,15 @@ static void >> ath10k_htt_rx_h_rx_offload_prot(struct ieee80211_rx_status *status, >> RX_FLAG_MMIC_STRIPPED; >> } >> -static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, >> - struct sk_buff_head *list) >> +static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar, >> + struct sk_buff_head *list) >> { >> struct ath10k_htt *htt = &ar->htt; >> struct ieee80211_rx_status *status = &htt->rx_status; >> struct htt_rx_offload_msdu *rx; >> struct sk_buff *msdu; >> size_t offset; >> + int num_msdu = 0; >> while ((msdu = __skb_dequeue(list))) { >> /* Offloaded frames don't have Rx descriptor. Instead they >> have >> @@ -1819,10 +1807,12 @@ static void ath10k_htt_rx_h_rx_offload(struct >> ath10k *ar, >> ath10k_htt_rx_h_rx_offload_prot(status, msdu); >> ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id); >> ath10k_process_rx(ar, status, msdu); >> + num_msdu++; >> } >> + return num_msdu; >> } >> -static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct >> sk_buff *skb) >> +static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct >> sk_buff *skb) >> { >> struct ath10k_htt *htt = &ar->htt; >> struct htt_resp *resp = (void *)skb->data; >> @@ -1835,12 +1825,12 @@ static void ath10k_htt_rx_in_ord_ind(struct >> ath10k *ar, struct sk_buff *skb) >> u8 tid; >> bool offload; >> bool frag; >> - int ret; >> + int ret, num_msdus = 0; >> lockdep_assert_held(&htt->rx_ring.lock); >> if (htt->rx_confused) >> - return; >> + return -EIO; >> skb_pull(skb, sizeof(resp->hdr)); >> skb_pull(skb, sizeof(resp->rx_in_ord_ind)); >> @@ -1859,7 +1849,7 @@ static void ath10k_htt_rx_in_ord_ind(struct >> ath10k *ar, struct sk_buff *skb) >> if (skb->len < msdu_count * >> sizeof(*resp->rx_in_ord_ind.msdu_descs)) { >> ath10k_warn(ar, "dropping invalid in order rx indication\n"); >> - return; >> + return -EINVAL; >> } >> /* The event can deliver more than 1 A-MSDU. Each A-MSDU is >> later >> @@ -1870,14 +1860,14 @@ static void ath10k_htt_rx_in_ord_ind(struct >> ath10k *ar, struct sk_buff *skb) >> if (ret < 0) { >> ath10k_warn(ar, "failed to pop paddr list: %d\n", ret); >> htt->rx_confused = true; >> - return; >> + return -EIO; >> } >> /* Offloaded frames are very different and need to be handled >> * separately. >> */ >> if (offload) >> - ath10k_htt_rx_h_rx_offload(ar, &list); >> + num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list); >> while (!skb_queue_empty(&list)) { >> __skb_queue_head_init(&amsdu); >> @@ -1890,6 +1880,7 @@ static void ath10k_htt_rx_in_ord_ind(struct >> ath10k *ar, struct sk_buff *skb) >> * better to report something than nothing though. This >> * should still give an idea about rx rate to the user. >> */ >> + num_msdus += skb_queue_len(&amsdu); >> ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); >> ath10k_htt_rx_h_filter(ar, &amsdu, status); >> ath10k_htt_rx_h_mpdu(ar, &amsdu, status); >> @@ -1902,9 +1893,10 @@ static void ath10k_htt_rx_in_ord_ind(struct >> ath10k *ar, struct sk_buff *skb) >> ath10k_warn(ar, "failed to extract amsdu: %d\n", ret); >> htt->rx_confused = true; >> __skb_queue_purge(&list); >> - return; >> + return -EIO; >> } >> } >> + return num_msdus; >> } >> static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k >> *ar, >> @@ -2267,7 +2259,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k >> *ar, struct sk_buff *skb) >> } >> case HTT_T2H_MSG_TYPE_TX_COMPL_IND: >> ath10k_htt_rx_tx_compl_ind(htt->ar, skb); >> - tasklet_schedule(&htt->txrx_compl_task); >> break; >> case HTT_T2H_MSG_TYPE_SEC_IND: { >> struct ath10k *ar = htt->ar; >> @@ -2284,7 +2275,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k >> *ar, struct sk_buff *skb) >> case HTT_T2H_MSG_TYPE_RX_FRAG_IND: { >> ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ", >> skb->data, skb->len); >> - ath10k_htt_rx_frag_handler(htt); >> + atomic_inc(&htt->num_mpdus_ready); >> break; >> } >> case HTT_T2H_MSG_TYPE_TEST: >> @@ -2322,8 +2313,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k >> *ar, struct sk_buff *skb) >> break; >> } >> case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: { >> - skb_queue_tail(&htt->rx_in_ord_compl_q, skb); >> - tasklet_schedule(&htt->txrx_compl_task); >> + __skb_queue_tail(&htt->rx_in_ord_compl_q, skb); >> return false; >> } >> case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND: >> @@ -2349,7 +2339,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k >> *ar, struct sk_buff *skb) >> break; >> } >> skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind); >> - tasklet_schedule(&htt->txrx_compl_task); >> break; >> } >> case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM: >> @@ -2378,27 +2367,77 @@ void >> ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, >> } >> EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler); >> -static void ath10k_htt_txrx_compl_task(unsigned long ptr) >> +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) >> { >> - struct ath10k_htt *htt = (struct ath10k_htt *)ptr; >> - struct ath10k *ar = htt->ar; >> + struct ath10k_htt *htt = &ar->htt; >> struct htt_tx_done tx_done = {}; >> - struct sk_buff_head rx_ind_q; >> struct sk_buff_head tx_ind_q; >> struct sk_buff *skb; >> unsigned long flags; >> - int num_mpdus; >> + int quota = 0, done, num_rx_msdus; >> + bool resched_napi = false; >> - __skb_queue_head_init(&rx_ind_q); >> __skb_queue_head_init(&tx_ind_q); >> - spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags); >> - skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q); >> - spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags); >> + /* Since in-ord-ind can deliver more than 1 A-MSDU in single event, >> + * process it first to utilize full available quota. >> + */ >> + while (quota < budget) { >> + if (skb_queue_empty(&htt->rx_in_ord_compl_q)) >> + break; >> - spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); >> - skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); >> - spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); >> + skb = __skb_dequeue(&htt->rx_in_ord_compl_q); >> + if (!skb) { >> + resched_napi = true; >> + goto exit; >> + } >> + >> + spin_lock_bh(&htt->rx_ring.lock); >> + num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb); >> + spin_unlock_bh(&htt->rx_ring.lock); >> + if (num_rx_msdus < 0) { >> + resched_napi = true; >> + goto exit; >> + } >> + >> + dev_kfree_skb_any(skb); >> + if (num_rx_msdus > 0) >> + quota += num_rx_msdus; >> + >> + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && >> + !skb_queue_empty(&htt->rx_in_ord_compl_q)) { >> + resched_napi = true; >> + goto exit; >> + } >> + } >> + >> + while (quota < budget) { >> + /* no more data to receive */ >> + if (!atomic_read(&htt->num_mpdus_ready)) >> + break; >> + >> + num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt); >> + if (num_rx_msdus < 0) { >> + resched_napi = true; >> + goto exit; >> + } >> + >> + quota += num_rx_msdus; >> + atomic_dec(&htt->num_mpdus_ready); >> + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && >> + atomic_read(&htt->num_mpdus_ready)) { >> + resched_napi = true; >> + goto exit; >> + } >> + } >> + >> + /* From NAPI documentation: >> + * The napi poll() function may also process TX completions, in >> which >> + * case if it processes the entire TX ring then it should count >> that >> + * work as the rest of the budget. >> + */ >> + if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo)) >> + quota = budget; >> /* kfifo_get: called only within txrx_tasklet so it's neatly >> serialized. >> * From kfifo_get() documentation: >> @@ -2408,27 +2447,22 @@ static void >> ath10k_htt_txrx_compl_task(unsigned long ptr) >> while (kfifo_get(&htt->txdone_fifo, &tx_done)) >> ath10k_txrx_tx_unref(htt, &tx_done); >> + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); >> + skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); >> + spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); >> + >> while ((skb = __skb_dequeue(&tx_ind_q))) { >> ath10k_htt_rx_tx_fetch_ind(ar, skb); >> dev_kfree_skb_any(skb); >> } >> - num_mpdus = atomic_read(&htt->num_mpdus_ready); >> - >> - while (num_mpdus) { >> - if (ath10k_htt_rx_handle_amsdu(htt)) >> - break; >> - >> - num_mpdus--; >> - atomic_dec(&htt->num_mpdus_ready); >> - } >> - >> - while ((skb = __skb_dequeue(&rx_ind_q))) { >> - spin_lock_bh(&htt->rx_ring.lock); >> - ath10k_htt_rx_in_ord_ind(ar, skb); >> - spin_unlock_bh(&htt->rx_ring.lock); >> - dev_kfree_skb_any(skb); >> - } >> - >> +exit: >> ath10k_htt_rx_msdu_buff_replenish(htt); >> + /* In case of rx failure or more data to read, report budget >> + * to reschedule NAPI poll >> + */ >> + done = resched_napi ? budget : quota; >> + >> + return done; >> } >> +EXPORT_SYMBOL(ath10k_htt_txrx_compl_task); >> diff --git a/drivers/net/wireless/ath/ath10k/pci.c >> b/drivers/net/wireless/ath/ath10k/pci.c >> index f06dd3941bac..9169ab92a16a 100644 >> --- a/drivers/net/wireless/ath/ath10k/pci.c >> +++ b/drivers/net/wireless/ath/ath10k/pci.c >> @@ -1502,12 +1502,10 @@ void >> ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe, >> ath10k_ce_per_engine_service(ar, pipe); >> } >> -void ath10k_pci_kill_tasklet(struct ath10k *ar) >> +static void ath10k_pci_rx_retry_sync(struct ath10k *ar) >> { >> struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); >> - tasklet_kill(&ar_pci->intr_tq); >> - >> del_timer_sync(&ar_pci->rx_post_retry); >> } >> @@ -1747,7 +1745,7 @@ void ath10k_pci_ce_deinit(struct ath10k *ar) >> void ath10k_pci_flush(struct ath10k *ar) >> { >> - ath10k_pci_kill_tasklet(ar); >> + ath10k_pci_rx_retry_sync(ar); >> ath10k_pci_buffer_cleanup(ar); >> } >> @@ -2754,35 +2752,53 @@ static irqreturn_t >> ath10k_pci_interrupt_handler(int irq, void *arg) >> return IRQ_NONE; >> } >> - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) { >> - if (!ath10k_pci_irq_pending(ar)) >> - return IRQ_NONE; >> - >> - ath10k_pci_disable_and_clear_legacy_irq(ar); >> - } >> + if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) && >> + !ath10k_pci_irq_pending(ar)) >> + return IRQ_NONE; >> - tasklet_schedule(&ar_pci->intr_tq); >> + ath10k_pci_disable_and_clear_legacy_irq(ar); >> + ath10k_pci_irq_msi_fw_mask(ar); >> + napi_schedule(&ar->napi); >> return IRQ_HANDLED; >> } >> -static void ath10k_pci_tasklet(unsigned long data) >> +static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) >> { >> - struct ath10k *ar = (struct ath10k *)data; >> - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); >> + struct ath10k *ar = container_of(ctx, struct ath10k, napi); >> + int done = 0; >> if (ath10k_pci_has_fw_crashed(ar)) { >> - ath10k_pci_irq_disable(ar); >> ath10k_pci_fw_crashed_clear(ar); >> ath10k_pci_fw_crashed_dump(ar); >> - return; >> + napi_complete(ctx); >> + return done; >> } >> ath10k_ce_per_engine_service_any(ar); >> - /* Re-enable legacy irq that was disabled in the irq handler */ >> - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) >> + done = ath10k_htt_txrx_compl_task(ar, budget); >> + >> + if (done < budget) { >> + napi_complete(ctx); >> + /* In case of MSI, it is possible that interrupts are received >> + * while NAPI poll is inprogress. So pending interrupts that >> are >> + * received after processing all copy engine pipes by NAPI poll >> + * will not be handled again. This is causing failure to >> + * complete boot sequence in x86 platform. So before enabling >> + * interrupts safer to check for pending interrupts for >> + * immediate servicing. >> + */ >> + if (CE_INTERRUPT_SUMMARY(ar)) { >> + napi_reschedule(&ar->napi); >> + goto out; >> + } >> ath10k_pci_enable_legacy_irq(ar); >> + ath10k_pci_irq_msi_fw_unmask(ar); >> + } >> + >> +out: >> + return done; >> } >> static int ath10k_pci_request_irq_msi(struct ath10k *ar) >> @@ -2840,11 +2856,11 @@ static void ath10k_pci_free_irq(struct ath10k >> *ar) >> free_irq(ar_pci->pdev->irq, ar); >> } >> -void ath10k_pci_init_irq_tasklets(struct ath10k *ar) >> +void ath10k_pci_init_napi(struct ath10k *ar) >> { >> - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); >> - >> - tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned >> long)ar); >> + netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll, >> + ATH10K_NAPI_BUDGET); >> + napi_enable(&ar->napi); >> } >> static int ath10k_pci_init_irq(struct ath10k *ar) >> @@ -2852,7 +2868,7 @@ static int ath10k_pci_init_irq(struct ath10k *ar) >> struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); >> int ret; >> - ath10k_pci_init_irq_tasklets(ar); >> + ath10k_pci_init_napi(ar); >> if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO) >> ath10k_info(ar, "limiting irq mode to: %d\n", >> @@ -3113,7 +3129,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar) >> void ath10k_pci_release_resource(struct ath10k *ar) >> { >> - ath10k_pci_kill_tasklet(ar); >> + ath10k_pci_rx_retry_sync(ar); >> + netif_napi_del(&ar->napi); >> ath10k_pci_ce_deinit(ar); >> ath10k_pci_free_pipes(ar); >> } >> @@ -3274,7 +3291,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev, >> err_free_irq: >> ath10k_pci_free_irq(ar); >> - ath10k_pci_kill_tasklet(ar); >> + ath10k_pci_rx_retry_sync(ar); >> err_deinit_irq: >> ath10k_pci_deinit_irq(ar); >> diff --git a/drivers/net/wireless/ath/ath10k/pci.h >> b/drivers/net/wireless/ath/ath10k/pci.h >> index 6eca1df2ce60..974e463e8b95 100644 >> --- a/drivers/net/wireless/ath/ath10k/pci.h >> +++ b/drivers/net/wireless/ath/ath10k/pci.h >> @@ -177,8 +177,6 @@ struct ath10k_pci { >> /* Operating interrupt mode */ >> enum ath10k_pci_irq_mode oper_irq_mode; >> - struct tasklet_struct intr_tq; >> - >> struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX]; >> /* Copy Engine used for Diagnostic Accesses */ >> @@ -294,8 +292,7 @@ void ath10k_pci_free_pipes(struct ath10k *ar); >> void ath10k_pci_free_pipes(struct ath10k *ar); >> void ath10k_pci_rx_replenish_retry(unsigned long ptr); >> void ath10k_pci_ce_deinit(struct ath10k *ar); >> -void ath10k_pci_init_irq_tasklets(struct ath10k *ar); >> -void ath10k_pci_kill_tasklet(struct ath10k *ar); >> +void ath10k_pci_init_napi(struct ath10k *ar); >> int ath10k_pci_init_pipes(struct ath10k *ar); >> int ath10k_pci_init_config(struct ath10k *ar); >> void ath10k_pci_rx_post(struct ath10k *ar); > >
Rajkumar Manoharan <rmanohar@qti.qualcomm.com> wrote: > Add NAPI support for rx and tx completion. NAPI poll is scheduled > from interrupt handler. The design is as below > > - on interrupt > - schedule napi and mask interrupts > - on poll > - process all pipes (no actual Tx/Rx) > - process Rx within budget > - if quota exceeds budget reschedule napi poll by returning budget > - process Tx completions and update budget if necessary > - process Tx fetch indications (pull-push) > - push any other pending Tx (if possible) > - before resched or napi completion replenish htt rx ring buffer > - if work done < budget, complete napi poll and unmask interrupts > > This change also get rid of two tasklets (intr_tq and txrx_compl_task). > > Measured peak throughput with NAPI on IPQ4019 platform in controlled > environment. No noticeable reduction in throughput is seen and also > observed improvements in CPU usage. Approx. 15% CPU usage got reduced > in UDP uplink case. > > DL: AP DUT Tx > UL: AP DUT Rx > > IPQ4019 (avg. cpu usage %) > > ======== > TOT +NAPI > =========== ============= > TCP DL 644 Mbps (42%) 645 Mbps (36%) > TCP UL 673 Mbps (30%) 675 Mbps (26%) > UDP DL 682 Mbps (49%) 680 Mbps (49%) > UDP UL 720 Mbps (28%) 717 Mbps (11%) > > Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> This is quite big change and I'm planning to queue this for 4.9. So I'm not applying this quite yet.
======== TOT +NAPI =========== ============= TCP DL 644 Mbps (42%) 645 Mbps (36%) TCP UL 673 Mbps (30%) 675 Mbps (26%) UDP DL 682 Mbps (49%) 680 Mbps (49%) UDP UL 720 Mbps (28%) 717 Mbps (11%) Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> --- drivers/net/wireless/ath/ath10k/ahb.c | 4 +- drivers/net/wireless/ath/ath10k/core.c | 2 + drivers/net/wireless/ath/ath10k/core.h | 8 ++ drivers/net/wireless/ath/ath10k/htt.h | 2 +- drivers/net/wireless/ath/ath10k/htt_rx.c | 154 +++++++++++++++++++------------ drivers/net/wireless/ath/ath10k/pci.c | 67 +++++++++----- drivers/net/wireless/ath/ath10k/pci.h | 5 +- 7 files changed, 149 insertions(+), 93 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index acec16b9cf49..9d4d5022be18 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -462,13 +462,11 @@ static void ath10k_ahb_halt_chip(struct ath10k *ar) static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg) { struct ath10k *ar = arg; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); if (!ath10k_pci_irq_pending(ar)) return IRQ_NONE; ath10k_pci_disable_and_clear_legacy_irq(ar); - tasklet_schedule(&ar_pci->intr_tq); return IRQ_HANDLED; } @@ -831,7 +829,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) goto err_resource_deinit; } - ath10k_pci_init_irq_tasklets(ar); + ath10k_pci_init_napi(ar); ret = ath10k_ahb_request_irq_legacy(ar); if (ret) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index dfb3db0ee5d1..2639e7e0836b 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2223,6 +2223,8 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, INIT_WORK(&ar->register_work, ath10k_core_register_work); INIT_WORK(&ar->restart_work, ath10k_core_restart); + init_dummy_netdev(&ar->napi_dev); + ret = ath10k_debug_create(ar); if (ret) goto err_free_aux_wq; diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 3da18c9dbd7a..10878926b629 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -65,6 +65,10 @@ #define ATH10K_KEEPALIVE_MAX_IDLE 3895 #define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900 +/* NAPI poll budget */ +#define ATH10K_NAPI_BUDGET 64 +#define ATH10K_NAPI_QUOTA_LIMIT 60 + struct ath10k; enum ath10k_bus { @@ -926,6 +930,10 @@ struct ath10k { struct ath10k_thermal thermal; struct ath10k_wow wow; + /* NAPI */ + struct net_device napi_dev; + struct napi_struct napi; + /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); }; diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 430a83e142aa..98c14247021b 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -1665,7 +1665,6 @@ struct ath10k_htt { /* This is used to group tx/rx completions separately and process them * in batches to reduce cache stalls */ - struct tasklet_struct txrx_compl_task; struct sk_buff_head rx_compl_q; struct sk_buff_head rx_in_ord_compl_q; struct sk_buff_head tx_fetch_ind_q; @@ -1798,5 +1797,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu); void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, struct sk_buff *skb); +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget); #endif diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 80e645302b54..c68b39bfc7bb 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -34,7 +34,6 @@ #define HTT_RX_RING_REFILL_RESCHED_MS 5 static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb); -static void ath10k_htt_txrx_compl_task(unsigned long ptr); static struct sk_buff * ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr) @@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar) void ath10k_htt_rx_free(struct ath10k_htt *htt) { del_timer_sync(&htt->rx_ring.refill_retry_timer); - tasklet_kill(&htt->txrx_compl_task); skb_queue_purge(&htt->rx_compl_q); skb_queue_purge(&htt->rx_in_ord_compl_q); @@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) skb_queue_head_init(&htt->tx_fetch_ind_q); atomic_set(&htt->num_mpdus_ready, 0); - tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, - (unsigned long)htt); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n", htt->rx_ring.size, htt->rx_ring.fill_level); return 0; @@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath10k *ar, trace_ath10k_rx_hdr(ar, skb->data, skb->len); trace_ath10k_rx_payload(ar, skb->data, skb->len); - ieee80211_rx(ar->hw, skb); + ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi); } static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar, @@ -1527,7 +1522,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) struct ath10k *ar = htt->ar; static struct ieee80211_rx_status rx_status; struct sk_buff_head amsdu; - int ret; + int ret, num_msdus; __skb_queue_head_init(&amsdu); @@ -1549,13 +1544,14 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) return ret; } + num_msdus = skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff); ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status); ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status); ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status); - return 0; + return num_msdus; } static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, @@ -1579,15 +1575,6 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, mpdu_count += mpdu_ranges[i].mpdu_count; atomic_add(mpdu_count, &htt->num_mpdus_ready); - - tasklet_schedule(&htt->txrx_compl_task); -} - -static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt) -{ - atomic_inc(&htt->num_mpdus_ready); - - tasklet_schedule(&htt->txrx_compl_task); } static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar, @@ -1772,14 +1759,15 @@ static void ath10k_htt_rx_h_rx_offload_prot(struct ieee80211_rx_status *status, RX_FLAG_MMIC_STRIPPED; } -static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, - struct sk_buff_head *list) +static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar, + struct sk_buff_head *list) { struct ath10k_htt *htt = &ar->htt; struct ieee80211_rx_status *status = &htt->rx_status; struct htt_rx_offload_msdu *rx; struct sk_buff *msdu; size_t offset; + int num_msdu = 0; while ((msdu = __skb_dequeue(list))) { /* Offloaded frames don't have Rx descriptor. Instead they have @@ -1819,10 +1807,12 @@ static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, ath10k_htt_rx_h_rx_offload_prot(status, msdu); ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id); ath10k_process_rx(ar, status, msdu); + num_msdu++; } + return num_msdu; } -static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) +static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) { struct ath10k_htt *htt = &ar->htt; struct htt_resp *resp = (void *)skb->data; @@ -1835,12 +1825,12 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) u8 tid; bool offload; bool frag; - int ret; + int ret, num_msdus = 0; lockdep_assert_held(&htt->rx_ring.lock); if (htt->rx_confused) - return; + return -EIO; skb_pull(skb, sizeof(resp->hdr)); skb_pull(skb, sizeof(resp->rx_in_ord_ind)); @@ -1859,7 +1849,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) { ath10k_warn(ar, "dropping invalid in order rx indication\n"); - return; + return -EINVAL; } /* The event can deliver more than 1 A-MSDU. Each A-MSDU is later @@ -1870,14 +1860,14 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) if (ret < 0) { ath10k_warn(ar, "failed to pop paddr list: %d\n", ret); htt->rx_confused = true; - return; + return -EIO; } /* Offloaded frames are very different and need to be handled * separately. */ if (offload) - ath10k_htt_rx_h_rx_offload(ar, &list); + num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list); while (!skb_queue_empty(&list)) { __skb_queue_head_init(&amsdu); @@ -1890,6 +1880,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) * better to report something than nothing though. This * should still give an idea about rx rate to the user. */ + num_msdus += skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status); ath10k_htt_rx_h_mpdu(ar, &amsdu, status); @@ -1902,9 +1893,10 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to extract amsdu: %d\n", ret); htt->rx_confused = true; __skb_queue_purge(&list); - return; + return -EIO; } } + return num_msdus; } static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, @@ -2267,7 +2259,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) } case HTT_T2H_MSG_TYPE_TX_COMPL_IND: ath10k_htt_rx_tx_compl_ind(htt->ar, skb); - tasklet_schedule(&htt->txrx_compl_task); break; case HTT_T2H_MSG_TYPE_SEC_IND: { struct ath10k *ar = htt->ar; @@ -2284,7 +2275,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) case HTT_T2H_MSG_TYPE_RX_FRAG_IND: { ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ", skb->data, skb->len); - ath10k_htt_rx_frag_handler(htt); + atomic_inc(&htt->num_mpdus_ready); break; } case HTT_T2H_MSG_TYPE_TEST: @@ -2322,8 +2313,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) break; } case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: { - skb_queue_tail(&htt->rx_in_ord_compl_q, skb); - tasklet_schedule(&htt->txrx_compl_task); + __skb_queue_tail(&htt->rx_in_ord_compl_q, skb); return false; } case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND: @@ -2349,7 +2339,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) break; } skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind); - tasklet_schedule(&htt->txrx_compl_task); break; } case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM: @@ -2378,27 +2367,77 @@ void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, } EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler); -static void ath10k_htt_txrx_compl_task(unsigned long ptr) +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) { - struct ath10k_htt *htt = (struct ath10k_htt *)ptr; - struct ath10k *ar = htt->ar; + struct ath10k_htt *htt = &ar->htt; struct htt_tx_done tx_done = {}; - struct sk_buff_head rx_ind_q; struct sk_buff_head tx_ind_q; struct sk_buff *skb; unsigned long flags; - int num_mpdus; + int quota = 0, done, num_rx_msdus; + bool resched_napi = false; - __skb_queue_head_init(&rx_ind_q); __skb_queue_head_init(&tx_ind_q); - spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags); - skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q); - spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags); + /* Since in-ord-ind can deliver more than 1 A-MSDU in single event, + * process it first to utilize full available quota. + */ + while (quota < budget) { + if (skb_queue_empty(&htt->rx_in_ord_compl_q)) + break; - spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); - skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); - spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); + skb = __skb_dequeue(&htt->rx_in_ord_compl_q); + if (!skb) { + resched_napi = true; + goto exit; + } + + spin_lock_bh(&htt->rx_ring.lock); + num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb); + spin_unlock_bh(&htt->rx_ring.lock); + if (num_rx_msdus < 0) { + resched_napi = true; + goto exit; + } + + dev_kfree_skb_any(skb); + if (num_rx_msdus > 0) + quota += num_rx_msdus; + + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && + !skb_queue_empty(&htt->rx_in_ord_compl_q)) { + resched_napi = true; + goto exit; + } + } + + while (quota < budget) { + /* no more data to receive */ + if (!atomic_read(&htt->num_mpdus_ready)) + break; + + num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt); + if (num_rx_msdus < 0) { + resched_napi = true; + goto exit; + } + + quota += num_rx_msdus; + atomic_dec(&htt->num_mpdus_ready); + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && + atomic_read(&htt->num_mpdus_ready)) { + resched_napi = true; + goto exit; + } + } + + /* From NAPI documentation: + * The napi poll() function may also process TX completions, in which + * case if it processes the entire TX ring then it should count that + * work as the rest of the budget. + */ + if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo)) + quota = budget; /* kfifo_get: called only within txrx_tasklet so it's neatly serialized. * From kfifo_get() documentation: @@ -2408,27 +2447,22 @@ static void ath10k_htt_txrx_compl_task(unsigned long ptr) while (kfifo_get(&htt->txdone_fifo, &tx_done)) ath10k_txrx_tx_unref(htt, &tx_done); + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); + skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); + spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); + while ((skb = __skb_dequeue(&tx_ind_q))) { ath10k_htt_rx_tx_fetch_ind(ar, skb); dev_kfree_skb_any(skb); } - num_mpdus = atomic_read(&htt->num_mpdus_ready); - - while (num_mpdus) { - if (ath10k_htt_rx_handle_amsdu(htt)) - break; - - num_mpdus--; - atomic_dec(&htt->num_mpdus_ready); - } - - while ((skb = __skb_dequeue(&rx_ind_q))) { - spin_lock_bh(&htt->rx_ring.lock); - ath10k_htt_rx_in_ord_ind(ar, skb); - spin_unlock_bh(&htt->rx_ring.lock); - dev_kfree_skb_any(skb); - } - +exit: ath10k_htt_rx_msdu_buff_replenish(htt); + /* In case of rx failure or more data to read, report budget + * to reschedule NAPI poll + */ + done = resched_napi ? budget : quota; + + return done; } +EXPORT_SYMBOL(ath10k_htt_txrx_compl_task); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index f06dd3941bac..9169ab92a16a 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1502,12 +1502,10 @@ void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe, ath10k_ce_per_engine_service(ar, pipe); } -void ath10k_pci_kill_tasklet(struct ath10k *ar) +static void ath10k_pci_rx_retry_sync(struct ath10k *ar) { struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - tasklet_kill(&ar_pci->intr_tq); - del_timer_sync(&ar_pci->rx_post_retry); } @@ -1747,7 +1745,7 @@ void ath10k_pci_ce_deinit(struct ath10k *ar) void ath10k_pci_flush(struct ath10k *ar) { - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); ath10k_pci_buffer_cleanup(ar); } @@ -2754,35 +2752,53 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg) return IRQ_NONE; } - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) { - if (!ath10k_pci_irq_pending(ar)) - return IRQ_NONE; - - ath10k_pci_disable_and_clear_legacy_irq(ar); - } + if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) && + !ath10k_pci_irq_pending(ar)) + return IRQ_NONE; - tasklet_schedule(&ar_pci->intr_tq); + ath10k_pci_disable_and_clear_legacy_irq(ar); + ath10k_pci_irq_msi_fw_mask(ar); + napi_schedule(&ar->napi); return IRQ_HANDLED; } -static void ath10k_pci_tasklet(unsigned long data) +static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) { - struct ath10k *ar = (struct ath10k *)data; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + struct ath10k *ar = container_of(ctx, struct ath10k, napi); + int done = 0; if (ath10k_pci_has_fw_crashed(ar)) { - ath10k_pci_irq_disable(ar); ath10k_pci_fw_crashed_clear(ar); ath10k_pci_fw_crashed_dump(ar); - return; + napi_complete(ctx); + return done; } ath10k_ce_per_engine_service_any(ar); - /* Re-enable legacy irq that was disabled in the irq handler */ - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) + done = ath10k_htt_txrx_compl_task(ar, budget); + + if (done < budget) { + napi_complete(ctx); + /* In case of MSI, it is possible that interrupts are received + * while NAPI poll is inprogress. So pending interrupts that are + * received after processing all copy engine pipes by NAPI poll + * will not be handled again. This is causing failure to + * complete boot sequence in x86 platform. So before enabling + * interrupts safer to check for pending interrupts for + * immediate servicing. + */ + if (CE_INTERRUPT_SUMMARY(ar)) { + napi_reschedule(&ar->napi); + goto out; + } ath10k_pci_enable_legacy_irq(ar); + ath10k_pci_irq_msi_fw_unmask(ar); + } + +out: + return done; } static int ath10k_pci_request_irq_msi(struct ath10k *ar) @@ -2840,11 +2856,11 @@ static void ath10k_pci_free_irq(struct ath10k *ar) free_irq(ar_pci->pdev->irq, ar); } -void ath10k_pci_init_irq_tasklets(struct ath10k *ar) +void ath10k_pci_init_napi(struct ath10k *ar) { - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - - tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar); + netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll, + ATH10K_NAPI_BUDGET); + napi_enable(&ar->napi); } static int ath10k_pci_init_irq(struct ath10k *ar) @@ -2852,7 +2868,7 @@ static int ath10k_pci_init_irq(struct ath10k *ar) struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); int ret; - ath10k_pci_init_irq_tasklets(ar); + ath10k_pci_init_napi(ar); if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO) ath10k_info(ar, "limiting irq mode to: %d\n", @@ -3113,7 +3129,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar) void ath10k_pci_release_resource(struct ath10k *ar) { - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); + netif_napi_del(&ar->napi); ath10k_pci_ce_deinit(ar); ath10k_pci_free_pipes(ar); } @@ -3274,7 +3291,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev, err_free_irq: ath10k_pci_free_irq(ar); - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); err_deinit_irq: ath10k_pci_deinit_irq(ar); diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h index 6eca1df2ce60..974e463e8b95 100644 --- a/drivers/net/wireless/ath/ath10k/pci.h +++ b/drivers/net/wireless/ath/ath10k/pci.h @@ -177,8 +177,6 @@ struct ath10k_pci { /* Operating interrupt mode */ enum ath10k_pci_irq_mode oper_irq_mode; - struct tasklet_struct intr_tq; - struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX]; /* Copy Engine used for Diagnostic Accesses */ @@ -294,8 +292,7 @@ void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_rx_replenish_retry(unsigned long ptr); void ath10k_pci_ce_deinit(struct ath10k *ar); -void ath10k_pci_init_irq_tasklets(struct ath10k *ar); -void ath10k_pci_kill_tasklet(struct ath10k *ar); +void ath10k_pci_init_napi(struct ath10k *ar); int ath10k_pci_init_pipes(struct ath10k *ar); int ath10k_pci_init_config(struct ath10k *ar); void ath10k_pci_rx_post(struct ath10k *ar);