[v6,3/3] ath10k: add workqueue for RX path of sdio
diff mbox series

Message ID 1569402639-31720-4-git-send-email-wgong@codeaurora.org
State Accepted
Commit 67654b26c903de9949202f23f4864a74a09e22d5
Delegated to: Kalle Valo
Headers show
Series
  • ath10k: improve throughout of RX of sdio
Related show

Commit Message

Wen Gong Sept. 25, 2019, 9:10 a.m. UTC
For RX, it has two parts, one is to read data from sdio, another
is to indicate the packets to upper stack. Recently it has only
one thread to do all RX things, it results that it is sequential
for RX and low throughout, change RX to parallel for the two parts
will increase throughout.

This patch move the indication to a workqueue, it results in
significant performance improvement on RX path.

Udp rx throughout is 200Mbps without this patch, and it arrives
400Mbps with this patch.

Tested with QCA6174 SDIO with firmware
WLAN.RMH.4.4.1-00017-QCARMSWPZ-1

Signed-off-by: Wen Gong <wgong@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/sdio.c | 35 +++++++++++++++++++++++++++++++---
 drivers/net/wireless/ath/ath10k/sdio.h | 11 +++++++++++
 2 files changed, 43 insertions(+), 3 deletions(-)

Comments

Kalle Valo Oct. 24, 2019, 8:14 a.m. UTC | #1
Wen Gong <wgong@codeaurora.org> writes:

> For RX, it has two parts, one is to read data from sdio, another
> is to indicate the packets to upper stack. Recently it has only
> one thread to do all RX things, it results that it is sequential
> for RX and low throughout, change RX to parallel for the two parts
> will increase throughout.
>
> This patch move the indication to a workqueue, it results in
> significant performance improvement on RX path.
>
> Udp rx throughout is 200Mbps without this patch, and it arrives
> 400Mbps with this patch.
>
> Tested with QCA6174 SDIO with firmware
> WLAN.RMH.4.4.1-00017-QCARMSWPZ-1
>
> Signed-off-by: Wen Gong <wgong@codeaurora.org>

[...]

> --- a/drivers/net/wireless/ath/ath10k/sdio.h
> +++ b/drivers/net/wireless/ath/ath10k/sdio.h
> @@ -98,6 +98,12 @@
>  #define ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_OFF 0xFFFEFFFF
>  #define ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_ON 0x10000
>  
> +struct ath10k_sdio_rx_request {
> +	struct list_head list;
> +	struct sk_buff *skb;
> +	struct ath10k_htc_ep *ep;
> +};

This is not used anymore, I removed it in the pending branch.
Kalle Valo Oct. 31, 2019, 9:08 a.m. UTC | #2
Wen Gong <wgong@codeaurora.org> writes:

> For RX, it has two parts, one is to read data from sdio, another
> is to indicate the packets to upper stack. Recently it has only
> one thread to do all RX things, it results that it is sequential
> for RX and low throughout, change RX to parallel for the two parts
> will increase throughout.
>
> This patch move the indication to a workqueue, it results in
> significant performance improvement on RX path.
>
> Udp rx throughout is 200Mbps without this patch, and it arrives
> 400Mbps with this patch.
>
> Tested with QCA6174 SDIO with firmware
> WLAN.RMH.4.4.1-00017-QCARMSWPZ-1
>
> Signed-off-by: Wen Gong <wgong@codeaurora.org>

[...]

> +static void ath10k_rx_indication_async_work(struct work_struct *work)
> +{
> +	struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio,
> +						   async_work_rx);
> +	struct ath10k *ar = ar_sdio->ar;
> +	struct ath10k_htc_ep *ep;
> +	struct ath10k_skb_cb *cb;
> +	struct sk_buff *skb;
> +
> +	while (true) {
> +		skb = skb_dequeue(&ar_sdio->rx_head);
> +		if (!skb)
> +			break;
> +		cb = ATH10K_SKB_CB(skb);
> +		ep = &ar->htc.endpoint[cb->eid];
> +		ep->ep_ops.ep_rx_complete(ar, skb);
> +	}
> +}

I just realised that this is RX path so we should use ATH10K_SKB_RXCB()
instead. I made the change below to this commit in pending branch:

https://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git/commit/?h=pending&id=28da1fe7a3ffa5c55c52328c21165d9efdf2e94c

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index af68eb5d0776..c5407f5080b2 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -124,6 +124,7 @@ struct ath10k_skb_cb {
 struct ath10k_skb_rxcb {
 	dma_addr_t paddr;
 	struct hlist_node hlist;
+	u8 eid;
 };
 
 static inline struct ath10k_skb_cb *ATH10K_SKB_CB(struct sk_buff *skb)
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index c34637881219..c7d09b07a382 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -419,7 +419,7 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
 	struct ath10k_htc *htc = &ar->htc;
 	struct ath10k_sdio_rx_data *pkt;
 	struct ath10k_htc_ep *ep;
-	struct ath10k_skb_cb *cb;
+	struct ath10k_skb_rxcb *cb;
 	enum ath10k_htc_ep_id id;
 	int ret, i, *n_lookahead_local;
 	u32 *lookaheads_local;
@@ -466,7 +466,7 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
 			goto out;
 
 		if (!pkt->trailer_only) {
-			cb = ATH10K_SKB_CB(pkt->skb);
+			cb = ATH10K_SKB_RXCB(pkt->skb);
 			cb->eid = id;
 
 			skb_queue_tail(&ar_sdio->rx_head, pkt->skb);
@@ -1333,14 +1333,14 @@ static void ath10k_rx_indication_async_work(struct work_struct *work)
 						   async_work_rx);
 	struct ath10k *ar = ar_sdio->ar;
 	struct ath10k_htc_ep *ep;
-	struct ath10k_skb_cb *cb;
+	struct ath10k_skb_rxcb *cb;
 	struct sk_buff *skb;
 
 	while (true) {
 		skb = skb_dequeue(&ar_sdio->rx_head);
 		if (!skb)
 			break;
-		cb = ATH10K_SKB_CB(skb);
+		cb = ATH10K_SKB_RXCB(skb);
 		ep = &ar->htc.endpoint[cb->eid];
 		ep->ep_ops.ep_rx_complete(ar, skb);
 	}
Wen Gong Nov. 1, 2019, 7:42 a.m. UTC | #3
On 2019-10-31 17:08, Kalle Valo wrote:
、> I just realised that this is RX path so we should use 
ATH10K_SKB_RXCB()
> instead. I made the change below to this commit in pending branch:
> 
I will test with the new patch together with other performance patches.
> https://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git/commit/?h=pending&id=28da1fe7a3ffa5c55c52328c21165d9efdf2e94c
> 
> diff --git a/drivers/net/wireless/ath/ath10k/core.h
> b/drivers/net/wireless/ath/ath10k/core.h
> index af68eb5d0776..c5407f5080b2 100644
> --- a/drivers/net/wireless/ath/ath10k/core.h
> +++ b/drivers/net/wireless/ath/ath10k/core.h
> @@ -124,6 +124,7 @@ struct ath10k_skb_cb {
>  struct ath10k_skb_rxcb {
>  	dma_addr_t paddr;
>  	struct hlist_node hlist;
> +	u8 eid;
>  };
> 
>  static inline struct ath10k_skb_cb *ATH10K_SKB_CB(struct sk_buff *skb)
> diff --git a/drivers/net/wireless/ath/ath10k/sdio.c
> b/drivers/net/wireless/ath/ath10k/sdio.c
> index c34637881219..c7d09b07a382 100644
> --- a/drivers/net/wireless/ath/ath10k/sdio.c
> +++ b/drivers/net/wireless/ath/ath10k/sdio.c
> @@ -419,7 +419,7 @@ static int
> ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
>  	struct ath10k_htc *htc = &ar->htc;
>  	struct ath10k_sdio_rx_data *pkt;
>  	struct ath10k_htc_ep *ep;
> -	struct ath10k_skb_cb *cb;
> +	struct ath10k_skb_rxcb *cb;
>  	enum ath10k_htc_ep_id id;
>  	int ret, i, *n_lookahead_local;
>  	u32 *lookaheads_local;
> @@ -466,7 +466,7 @@ static int
> ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
>  			goto out;
> 
>  		if (!pkt->trailer_only) {
> -			cb = ATH10K_SKB_CB(pkt->skb);
> +			cb = ATH10K_SKB_RXCB(pkt->skb);
>  			cb->eid = id;
> 
>  			skb_queue_tail(&ar_sdio->rx_head, pkt->skb);
> @@ -1333,14 +1333,14 @@ static void
> ath10k_rx_indication_async_work(struct work_struct *work)
>  						   async_work_rx);
>  	struct ath10k *ar = ar_sdio->ar;
>  	struct ath10k_htc_ep *ep;
> -	struct ath10k_skb_cb *cb;
> +	struct ath10k_skb_rxcb *cb;
>  	struct sk_buff *skb;
> 
>  	while (true) {
>  		skb = skb_dequeue(&ar_sdio->rx_head);
>  		if (!skb)
>  			break;
> -		cb = ATH10K_SKB_CB(skb);
> +		cb = ATH10K_SKB_RXCB(skb);
>  		ep = &ar->htc.endpoint[cb->eid];
>  		ep->ep_ops.ep_rx_complete(ar, skb);
>  	}
Wen Gong Nov. 11, 2019, 10:47 a.m. UTC | #4
On 2019-11-01 15:42, Wen Gong wrote:
> On 2019-10-31 17:08, Kalle Valo wrote:
> 、> I just realised that this is RX path so we should use 
> ATH10K_SKB_RXCB()
>> instead. I made the change below to this commit in pending branch:
>> 
> I will test with the new patch together with other performance patches.
Hi Kalle, I have tested with the patches of pending branch, it is 
success.
result is same with the public review which I tested before.

the patches I tested on pending branch:

ath10k: enable alt data of TX path for sdio
ath10k: add htt TX bundle for sdio
ath10k: disable TX complete indication of htt for sdio
ath10k: enable napi on RX path for sdio
ath10k: sdio: remove struct ath10k_sdio_rx_data::status
ath10k: sdio: cosmetic cleanup
ath10k: add workqueue for RX path of sdio
ath10k: change max RX bundle size from 8 to 32 for sdio
ath10k: enable RX bundle receive for sdio
Kalle Valo Nov. 11, 2019, 12:23 p.m. UTC | #5
Wen Gong <wgong@codeaurora.org> writes:

> On 2019-11-01 15:42, Wen Gong wrote:
>> On 2019-10-31 17:08, Kalle Valo wrote:
>> 、> I just realised that this is RX path so we should use
>> ATH10K_SKB_RXCB()
>>> instead. I made the change below to this commit in pending branch:
>>>
>> I will test with the new patch together with other performance patches.
> Hi Kalle, I have tested with the patches of pending branch, it is
> success.
> result is same with the public review which I tested before.
>
> the patches I tested on pending branch:
>
> ath10k: enable alt data of TX path for sdio
> ath10k: add htt TX bundle for sdio
> ath10k: disable TX complete indication of htt for sdio
> ath10k: enable napi on RX path for sdio
> ath10k: sdio: remove struct ath10k_sdio_rx_data::status
> ath10k: sdio: cosmetic cleanup
> ath10k: add workqueue for RX path of sdio
> ath10k: change max RX bundle size from 8 to 32 for sdio
> ath10k: enable RX bundle receive for sdio

Very good, thanks for testing.
Wen Gong Nov. 13, 2019, 3:37 a.m. UTC | #6
On 2019-11-11 20:23, Kalle Valo wrote:
> Wen Gong <wgong@codeaurora.org> writes:
> 
>> On 2019-11-01 15:42, Wen Gong wrote:
>>> On 2019-10-31 17:08, Kalle Valo wrote:
>>> 、> I just realised that this is RX path so we should use
>>> ATH10K_SKB_RXCB()
>>>> instead. I made the change below to this commit in pending branch:
>>>> 
>>> I will test with the new patch together with other performance 
>>> patches.
>> Hi Kalle, I have tested with the patches of pending branch, it is
>> success.
>> result is same with the public review which I tested before.
>> 
>> the patches I tested on pending branch:
>> 
>> ath10k: enable alt data of TX path for sdio
>> ath10k: add htt TX bundle for sdio
>> ath10k: disable TX complete indication of htt for sdio
>> ath10k: enable napi on RX path for sdio
>> ath10k: sdio: remove struct ath10k_sdio_rx_data::status
>> ath10k: sdio: cosmetic cleanup
>> ath10k: add workqueue for RX path of sdio
>> ath10k: change max RX bundle size from 8 to 32 for sdio
>> ath10k: enable RX bundle receive for sdio
> 
> Very good, thanks for testing.
Hi Kalle,

this patch will trigger connect fail for PSK mode AP:
ath10k: add workqueue for RX path of sdio

I have sent patch to fix it:
ath10k: clear ieee80211_rx_status for sdio
Kalle Valo Nov. 22, 2019, 10:02 a.m. UTC | #7
Wen Gong <wgong@codeaurora.org> writes:

> On 2019-11-11 20:23, Kalle Valo wrote:
>> Wen Gong <wgong@codeaurora.org> writes:
>>
>>> On 2019-11-01 15:42, Wen Gong wrote:
>>>> On 2019-10-31 17:08, Kalle Valo wrote:
>>>> 、> I just realised that this is RX path so we should use
>>>> ATH10K_SKB_RXCB()
>>>>> instead. I made the change below to this commit in pending branch:
>>>>>
>>>> I will test with the new patch together with other performance
>>>> patches.
>>> Hi Kalle, I have tested with the patches of pending branch, it is
>>> success.
>>> result is same with the public review which I tested before.
>>>
>>> the patches I tested on pending branch:
>>>
>>> ath10k: enable alt data of TX path for sdio
>>> ath10k: add htt TX bundle for sdio
>>> ath10k: disable TX complete indication of htt for sdio
>>> ath10k: enable napi on RX path for sdio
>>> ath10k: sdio: remove struct ath10k_sdio_rx_data::status
>>> ath10k: sdio: cosmetic cleanup
>>> ath10k: add workqueue for RX path of sdio
>>> ath10k: change max RX bundle size from 8 to 32 for sdio
>>> ath10k: enable RX bundle receive for sdio
>>
>> Very good, thanks for testing.
>
> this patch will trigger connect fail for PSK mode AP:
> ath10k: add workqueue for RX path of sdio
>
> I have sent patch to fix it:
> ath10k: clear ieee80211_rx_status for sdio

Good catch! But as this patch is not yet applied I fixed this patch
instead with this:

--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2235,7 +2235,10 @@ static bool ath10k_htt_rx_proc_rx_ind_hl(struct ath10k_htt *htt,
 
        hdr = (struct ieee80211_hdr *)skb->data;
        qos = ieee80211_is_data_qos(hdr->frame_control);
+
        rx_status = IEEE80211_SKB_RXCB(skb);
+       memset(rx_status, 0, sizeof(*rx_status));
+
        rx_status->chains |= BIT(0);
        if (rx->ppdu.combined_rssi == 0) {
                /* SDIO firmware does not provide signal */

Patch
diff mbox series

diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index a510101..ff02833 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -419,6 +419,7 @@  static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
 	struct ath10k_htc *htc = &ar->htc;
 	struct ath10k_sdio_rx_data *pkt;
 	struct ath10k_htc_ep *ep;
+	struct ath10k_skb_cb *cb;
 	enum ath10k_htc_ep_id id;
 	int ret, i, *n_lookahead_local;
 	u32 *lookaheads_local;
@@ -464,10 +465,16 @@  static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
 		if (ret)
 			goto out;
 
-		if (!pkt->trailer_only)
-			ep->ep_ops.ep_rx_complete(ar_sdio->ar, pkt->skb);
-		else
+		if (!pkt->trailer_only) {
+			cb = ATH10K_SKB_CB(pkt->skb);
+			cb->eid = id;
+
+			skb_queue_tail(&ar_sdio->rx_head, pkt->skb);
+			queue_work(ar->workqueue_aux,
+				   &ar_sdio->async_work_rx);
+		} else {
 			kfree_skb(pkt->skb);
+		}
 
 		/* The RX complete handler now owns the skb...*/
 		pkt->skb = NULL;
@@ -1317,6 +1324,25 @@  static void __ath10k_sdio_write_async(struct ath10k *ar,
 	ath10k_sdio_free_bus_req(ar, req);
 }
 
+static void ath10k_rx_indication_async_work(struct work_struct *work)
+{
+	struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio,
+						   async_work_rx);
+	struct ath10k *ar = ar_sdio->ar;
+	struct ath10k_htc_ep *ep;
+	struct ath10k_skb_cb *cb;
+	struct sk_buff *skb;
+
+	while (true) {
+		skb = skb_dequeue(&ar_sdio->rx_head);
+		if (!skb)
+			break;
+		cb = ATH10K_SKB_CB(skb);
+		ep = &ar->htc.endpoint[cb->eid];
+		ep->ep_ops.ep_rx_complete(ar, skb);
+	}
+}
+
 static void ath10k_sdio_write_async_work(struct work_struct *work)
 {
 	struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio,
@@ -2087,6 +2113,9 @@  static int ath10k_sdio_probe(struct sdio_func *func,
 	for (i = 0; i < ATH10K_SDIO_BUS_REQUEST_MAX_NUM; i++)
 		ath10k_sdio_free_bus_req(ar, &ar_sdio->bus_req[i]);
 
+	skb_queue_head_init(&ar_sdio->rx_head);
+	INIT_WORK(&ar_sdio->async_work_rx, ath10k_rx_indication_async_work);
+
 	dev_id_base = FIELD_GET(QCA_MANUFACTURER_ID_BASE, id->device);
 	switch (dev_id_base) {
 	case QCA_MANUFACTURER_ID_AR6005_BASE:
diff --git a/drivers/net/wireless/ath/ath10k/sdio.h b/drivers/net/wireless/ath/ath10k/sdio.h
index 00bd4ca..8aa0dbc 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.h
+++ b/drivers/net/wireless/ath/ath10k/sdio.h
@@ -98,6 +98,12 @@ 
 #define ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_OFF 0xFFFEFFFF
 #define ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_ON 0x10000
 
+struct ath10k_sdio_rx_request {
+	struct list_head list;
+	struct sk_buff *skb;
+	struct ath10k_htc_ep *ep;
+};
+
 struct ath10k_sdio_bus_request {
 	struct list_head list;
 
@@ -187,6 +193,9 @@  struct ath10k_sdio {
 	struct ath10k_sdio_bus_request bus_req[ATH10K_SDIO_BUS_REQUEST_MAX_NUM];
 	/* free list of bus requests */
 	struct list_head bus_req_freeq;
+
+	struct sk_buff_head rx_head;
+
 	/* protects access to bus_req_freeq */
 	spinlock_t lock;
 
@@ -213,6 +222,8 @@  struct ath10k_sdio {
 	struct list_head wr_asyncq;
 	/* protects access to wr_asyncq */
 	spinlock_t wr_async_lock;
+
+	struct work_struct async_work_rx;
 };
 
 static inline struct ath10k_sdio *ath10k_sdio_priv(struct ath10k *ar)