diff mbox series

[RFC,1/2] ath11k: drop tx_status_fifo

Message ID 20190628101127.6044-1-john@phrozen.org (mailing list archive)
State RFC
Delegated to: Kalle Valo
Headers show
Series [RFC,1/2] ath11k: drop tx_status_fifo | expand

Commit Message

John Crispin June 28, 2019, 10:11 a.m. UTC
perf top showed that 10% of the cpu time was spent on the memcpy used to
populate the fifo when the device is under high load. There is no apparent
benefit from using a fifo so lets just drop it.

Signed-off-by: John Crispin <john@phrozen.org>
---
 drivers/net/wireless/ath/ath11k/dp.c     | 13 ---------
 drivers/net/wireless/ath/ath11k/dp.h     |  5 ----
 drivers/net/wireless/ath/ath11k/dp_tx.c  | 37 +++++-------------------
 drivers/net/wireless/ath/ath11k/hal_tx.c |  5 ----
 drivers/net/wireless/ath/ath11k/hal_tx.h |  1 -
 5 files changed, 8 insertions(+), 53 deletions(-)

Comments

Vasanthakumar Thiagarajan June 28, 2019, 11:07 a.m. UTC | #1
On 2019-06-28 15:41, John Crispin wrote:
> perf top showed that 10% of the cpu time was spent on the memcpy used 
> to
> populate the fifo when the device is under high load. There is no 
> apparent
> benefit from using a fifo so lets just drop it.
> 
> Signed-off-by: John Crispin <john@phrozen.org>
> ---
>  drivers/net/wireless/ath/ath11k/dp.c     | 13 ---------
>  drivers/net/wireless/ath/ath11k/dp.h     |  5 ----
>  drivers/net/wireless/ath/ath11k/dp_tx.c  | 37 +++++-------------------
>  drivers/net/wireless/ath/ath11k/hal_tx.c |  5 ----
>  drivers/net/wireless/ath/ath11k/hal_tx.h |  1 -
>  5 files changed, 8 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/net/wireless/ath/ath11k/dp.c
> b/drivers/net/wireless/ath/ath11k/dp.c
> index 3f5174a0c622..7e0a67d9e5c1 100644
> --- a/drivers/net/wireless/ath/ath11k/dp.c
> +++ b/drivers/net/wireless/ath/ath11k/dp.c
> @@ -839,10 +839,6 @@ void ath11k_dp_free(struct ath11k_base *sc)
>  			     ath11k_dp_tx_pending_cleanup, sc);
>  		idr_destroy(&dp->tx_ring[i].txbuf_idr);
>  		spin_unlock_bh(&dp->tx_ring[i].tx_idr_lock);
> -
> -		spin_lock_bh(&dp->tx_ring[i].tx_status_lock);
> -		kfifo_free(&dp->tx_ring[i].tx_status_fifo);
> -		spin_unlock_bh(&dp->tx_ring[i].tx_status_lock);
>  	}
> 
>  	/* Deinit any SOC level resource */
> @@ -888,12 +884,6 @@ int ath11k_dp_alloc(struct ath11k_base *sc)
>  		idr_init(&dp->tx_ring[i].txbuf_idr);
>  		spin_lock_init(&dp->tx_ring[i].tx_idr_lock);
>  		dp->tx_ring[i].tcl_data_ring_id = i;
> -
> -		spin_lock_init(&dp->tx_ring[i].tx_status_lock);
> -		ret = kfifo_alloc(&dp->tx_ring[i].tx_status_fifo, size,
> -				  GFP_KERNEL);
> -		if (ret)
> -			goto fail_cmn_srng_cleanup;
>  	}
> 
>  	for (i = 0; i < HAL_DSCP_TID_MAP_TBL_NUM_ENTRIES_MAX; i++)
> @@ -903,9 +893,6 @@ int ath11k_dp_alloc(struct ath11k_base *sc)
> 
>  	return 0;
> 
> -fail_cmn_srng_cleanup:
> -	ath11k_dp_srng_common_cleanup(sc);
> -
>  fail_link_desc_cleanup:
>  	ath11k_dp_link_desc_cleanup(sc, dp->link_desc_banks,
>  				    HAL_WBM_IDLE_LINK, &dp->wbm_idle_ring);
> diff --git a/drivers/net/wireless/ath/ath11k/dp.h
> b/drivers/net/wireless/ath/ath11k/dp.h
> index e09065690481..b654277cb68d 100644
> --- a/drivers/net/wireless/ath/ath11k/dp.h
> +++ b/drivers/net/wireless/ath/ath11k/dp.h
> @@ -66,11 +66,6 @@ struct dp_tx_ring {
>  	u32 num_tx_pending;
>  	/* Protects txbuf_idr and num_pending */
>  	spinlock_t tx_idr_lock;
> -	DECLARE_KFIFO_PTR(tx_status_fifo, struct hal_wbm_release_ring);
> -	/* lock to protect tx_status_fifo because tx_status_fifo can be
> -	 * accessed concurrently.
> -	 */
> -	spinlock_t tx_status_lock;
>  };
> 
>  struct ath11k_pdev_mon_stats {
> diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c
> b/drivers/net/wireless/ath/ath11k/dp_tx.c
> index e7229f6289c1..b3d5995fc657 100644
> --- a/drivers/net/wireless/ath/ath11k/dp_tx.c
> +++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
> @@ -448,47 +448,28 @@ void ath11k_dp_tx_completion_handler(struct
> ath11k_base *ab, int ring_id)
>  	int hal_ring_id = dp->tx_ring[ring_id].tcl_comp_ring.ring_id;
>  	struct hal_srng *status_ring = &ab->hal.srng_list[hal_ring_id];
>  	struct sk_buff *msdu;
> -	struct hal_wbm_release_ring tx_status;
>  	struct hal_tx_status ts;
>  	struct dp_tx_ring *tx_ring = &dp->tx_ring[ring_id];
> -	u32 *desc;
>  	u32 msdu_id;
> +	u32 *desc;
>  	u8 mac_id;
> 
>  	spin_lock_bh(&status_ring->lock);
> 
>  	ath11k_hal_srng_access_begin(ab, status_ring);
> 
> -	spin_lock_bh(&tx_ring->tx_status_lock);
> -	while (!kfifo_is_full(&tx_ring->tx_status_fifo) &&
> -	       (desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) 
> {
> -		ath11k_hal_tx_status_desc_sync((void *)desc,
> -					       (void *)&tx_status);
> -		kfifo_put(&tx_ring->tx_status_fifo, tx_status);
> -	}
> -
> -	if ((ath11k_hal_srng_dst_peek(ab, status_ring) != NULL) &&
> -	    kfifo_is_full(&tx_ring->tx_status_fifo)) {
> -		/* TODO: Process pending tx_status messages when kfifo_is_full() */
> -		ath11k_warn(ab, "Unable to process some of the tx_status ring desc
> because status_fifo is full\n");
> -	}
> -
> -	spin_unlock_bh(&tx_ring->tx_status_lock);
> -
> -	ath11k_hal_srng_access_end(ab, status_ring);
> -	spin_unlock_bh(&status_ring->lock);
> -
> -	spin_lock_bh(&tx_ring->tx_status_lock);
> -	while (kfifo_get(&tx_ring->tx_status_fifo, &tx_status)) {
> +	while ((desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) 
> {
>  		memset(&ts, 0, sizeof(ts));
> -		ath11k_hal_tx_status_parse(ab, &tx_status, &ts);
> +		ath11k_hal_tx_status_parse(ab,
> +					   (struct hal_wbm_release_ring *)desc,
> +					   &ts);
> 
>  		mac_id = FIELD_GET(DP_TX_DESC_ID_MAC_ID, ts.desc_id);
>  		msdu_id = FIELD_GET(DP_TX_DESC_ID_MSDU_ID, ts.desc_id);
> 
>  		if (ts.buf_rel_source == HAL_WBM_REL_SRC_MODULE_FW) {
>  			ath11k_dp_tx_process_htt_tx_complete(ab,
> -							     (void *)&tx_status,
> +							     (void *)desc,
>  							     mac_id, msdu_id,
>  							     tx_ring);

Delay in reaping tx completion desc from hw ring could trigger 
backpressure on tx completion ring which could result
in target assert. Extensive testing with this change is required to make 
sure this change does not cause any issue.

Vasanth
diff mbox series

Patch

diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index 3f5174a0c622..7e0a67d9e5c1 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -839,10 +839,6 @@  void ath11k_dp_free(struct ath11k_base *sc)
 			     ath11k_dp_tx_pending_cleanup, sc);
 		idr_destroy(&dp->tx_ring[i].txbuf_idr);
 		spin_unlock_bh(&dp->tx_ring[i].tx_idr_lock);
-
-		spin_lock_bh(&dp->tx_ring[i].tx_status_lock);
-		kfifo_free(&dp->tx_ring[i].tx_status_fifo);
-		spin_unlock_bh(&dp->tx_ring[i].tx_status_lock);
 	}
 
 	/* Deinit any SOC level resource */
@@ -888,12 +884,6 @@  int ath11k_dp_alloc(struct ath11k_base *sc)
 		idr_init(&dp->tx_ring[i].txbuf_idr);
 		spin_lock_init(&dp->tx_ring[i].tx_idr_lock);
 		dp->tx_ring[i].tcl_data_ring_id = i;
-
-		spin_lock_init(&dp->tx_ring[i].tx_status_lock);
-		ret = kfifo_alloc(&dp->tx_ring[i].tx_status_fifo, size,
-				  GFP_KERNEL);
-		if (ret)
-			goto fail_cmn_srng_cleanup;
 	}
 
 	for (i = 0; i < HAL_DSCP_TID_MAP_TBL_NUM_ENTRIES_MAX; i++)
@@ -903,9 +893,6 @@  int ath11k_dp_alloc(struct ath11k_base *sc)
 
 	return 0;
 
-fail_cmn_srng_cleanup:
-	ath11k_dp_srng_common_cleanup(sc);
-
 fail_link_desc_cleanup:
 	ath11k_dp_link_desc_cleanup(sc, dp->link_desc_banks,
 				    HAL_WBM_IDLE_LINK, &dp->wbm_idle_ring);
diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index e09065690481..b654277cb68d 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -66,11 +66,6 @@  struct dp_tx_ring {
 	u32 num_tx_pending;
 	/* Protects txbuf_idr and num_pending */
 	spinlock_t tx_idr_lock;
-	DECLARE_KFIFO_PTR(tx_status_fifo, struct hal_wbm_release_ring);
-	/* lock to protect tx_status_fifo because tx_status_fifo can be
-	 * accessed concurrently.
-	 */
-	spinlock_t tx_status_lock;
 };
 
 struct ath11k_pdev_mon_stats {
diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index e7229f6289c1..b3d5995fc657 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -448,47 +448,28 @@  void ath11k_dp_tx_completion_handler(struct ath11k_base *ab, int ring_id)
 	int hal_ring_id = dp->tx_ring[ring_id].tcl_comp_ring.ring_id;
 	struct hal_srng *status_ring = &ab->hal.srng_list[hal_ring_id];
 	struct sk_buff *msdu;
-	struct hal_wbm_release_ring tx_status;
 	struct hal_tx_status ts;
 	struct dp_tx_ring *tx_ring = &dp->tx_ring[ring_id];
-	u32 *desc;
 	u32 msdu_id;
+	u32 *desc;
 	u8 mac_id;
 
 	spin_lock_bh(&status_ring->lock);
 
 	ath11k_hal_srng_access_begin(ab, status_ring);
 
-	spin_lock_bh(&tx_ring->tx_status_lock);
-	while (!kfifo_is_full(&tx_ring->tx_status_fifo) &&
-	       (desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) {
-		ath11k_hal_tx_status_desc_sync((void *)desc,
-					       (void *)&tx_status);
-		kfifo_put(&tx_ring->tx_status_fifo, tx_status);
-	}
-
-	if ((ath11k_hal_srng_dst_peek(ab, status_ring) != NULL) &&
-	    kfifo_is_full(&tx_ring->tx_status_fifo)) {
-		/* TODO: Process pending tx_status messages when kfifo_is_full() */
-		ath11k_warn(ab, "Unable to process some of the tx_status ring desc because status_fifo is full\n");
-	}
-
-	spin_unlock_bh(&tx_ring->tx_status_lock);
-
-	ath11k_hal_srng_access_end(ab, status_ring);
-	spin_unlock_bh(&status_ring->lock);
-
-	spin_lock_bh(&tx_ring->tx_status_lock);
-	while (kfifo_get(&tx_ring->tx_status_fifo, &tx_status)) {
+	while ((desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) {
 		memset(&ts, 0, sizeof(ts));
-		ath11k_hal_tx_status_parse(ab, &tx_status, &ts);
+		ath11k_hal_tx_status_parse(ab,
+					   (struct hal_wbm_release_ring *)desc,
+					   &ts);
 
 		mac_id = FIELD_GET(DP_TX_DESC_ID_MAC_ID, ts.desc_id);
 		msdu_id = FIELD_GET(DP_TX_DESC_ID_MSDU_ID, ts.desc_id);
 
 		if (ts.buf_rel_source == HAL_WBM_REL_SRC_MODULE_FW) {
 			ath11k_dp_tx_process_htt_tx_complete(ab,
-							     (void *)&tx_status,
+							     (void *)desc,
 							     mac_id, msdu_id,
 							     tx_ring);
 			continue;
@@ -511,12 +492,10 @@  void ath11k_dp_tx_completion_handler(struct ath11k_base *ab, int ring_id)
 		if (atomic_dec_and_test(&ar->dp.num_tx_pending))
 			wake_up(&ar->dp.tx_empty_waitq);
 
-		/* TODO: Locking optimization so that tx_completion for an msdu
-		 * is not called with tx_status_lock acquired
-		 */
 		ath11k_dp_tx_complete_msdu(ar, msdu, &ts);
 	}
-	spin_unlock_bh(&tx_ring->tx_status_lock);
+	ath11k_hal_srng_access_end(ab, status_ring);
+	spin_unlock_bh(&status_ring->lock);
 }
 
 int ath11k_dp_tx_send_reo_cmd(struct ath11k_base *ab, struct dp_rx_tid *rx_tid,
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.c b/drivers/net/wireless/ath/ath11k/hal_tx.c
index 3c04ebe23c22..899fb9356f2d 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.c
@@ -79,11 +79,6 @@  void ath11k_hal_tx_desc_sync(void *tx_desc_cached, void *hw_desc)
 	       sizeof(struct hal_tcl_data_cmd));
 }
 
-void ath11k_hal_tx_status_desc_sync(void *hw_desc, void *local_desc)
-{
-	memcpy(local_desc, hw_desc, HAL_TX_STATUS_DESC_LEN);
-}
-
 void ath11k_hal_tx_status_parse(struct ath11k_base *ab,
 				struct hal_wbm_release_ring *desc,
 				struct hal_tx_status *ts)
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.h b/drivers/net/wireless/ath/ath11k/hal_tx.h
index 3b12dfda7423..b37d585355a0 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.h
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.h
@@ -70,7 +70,6 @@  void ath11k_hal_tx_desc_sync(void *tx_desc_cached, void *hw_desc);
 void ath11k_hal_tx_status_parse(struct ath11k_base *ab,
 				struct hal_wbm_release_ring *desc,
 				struct hal_tx_status *ts);
-void ath11k_hal_tx_status_desc_sync(void *hw_desc, void *local_desc);
 void ath11k_hal_tx_set_dscp_tid_map(struct ath11k_base *ab, int id);
 int ath11k_hal_reo_cmd_send(struct ath11k_base *ab, struct hal_srng *srng,
 			    enum hal_reo_cmd_type type,