diff mbox series

[02/12] ath11k: allocate dst ring descriptors from cacheable memory

Message ID 20210615211407.92233-3-jouni@codeaurora.org (mailing list archive)
State Changes Requested
Delegated to: Kalle Valo
Headers show
Series ath11k: optimizations in data path | expand

Commit Message

Jouni Malinen June 15, 2021, 9:13 p.m. UTC
From: P Praneesh <ppranees@codeaurora.org>

tcl_data and reo_dst rings are currently being allocated
using dma_allocate_coherent() which is non cachable.

Allocating ring memory from cacheable memory area
allows cached descriptor access and prefetch next
descriptors to optimize CPU usage during
descriptor processing on NAPI.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1
Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01695-QCAHKSWPL_SILICONZ-1

Co-developed-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Co-developed-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: P Praneesh <ppranees@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
---
 drivers/net/wireless/ath/ath11k/dp.c  | 34 +++++++++++++++++++++++----
 drivers/net/wireless/ath/ath11k/dp.h  |  1 +
 drivers/net/wireless/ath/ath11k/hal.c | 25 ++++++++++++++++++--
 drivers/net/wireless/ath/ath11k/hal.h |  1 +
 4 files changed, 54 insertions(+), 7 deletions(-)

Comments

Jeff Johnson June 28, 2021, 3:19 p.m. UTC | #1
On 2021-06-15 14:13, Jouni Malinen wrote:
> From: P Praneesh <ppranees@codeaurora.org>
> 
> tcl_data and reo_dst rings are currently being allocated
> using dma_allocate_coherent() which is non cachable.
> 
> Allocating ring memory from cacheable memory area
> allows cached descriptor access and prefetch next
> descriptors to optimize CPU usage during
> descriptor processing on NAPI.
> 
> Tested-on: QCN9074 hw1.0 PCI 
> WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1
> Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01695-QCAHKSWPL_SILICONZ-1
> 
> Co-developed-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
> Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
> Co-developed-by: Sriram R <srirrama@codeaurora.org>
> Signed-off-by: Sriram R <srirrama@codeaurora.org>
> Signed-off-by: P Praneesh <ppranees@codeaurora.org>
> Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
> ---
>  drivers/net/wireless/ath/ath11k/dp.c  | 34 +++++++++++++++++++++++----
>  drivers/net/wireless/ath/ath11k/dp.h  |  1 +
>  drivers/net/wireless/ath/ath11k/hal.c | 25 ++++++++++++++++++--
>  drivers/net/wireless/ath/ath11k/hal.h |  1 +
>  4 files changed, 54 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/wireless/ath/ath11k/dp.c
> b/drivers/net/wireless/ath/ath11k/dp.c
> index b0c8f6290099..cf869ebc209a 100644
> --- a/drivers/net/wireless/ath/ath11k/dp.c
> +++ b/drivers/net/wireless/ath/ath11k/dp.c
> @@ -101,8 +101,11 @@ void ath11k_dp_srng_cleanup(struct ath11k_base 
> *ab,
> struct dp_srng *ring)
>  	if (!ring->vaddr_unaligned)
>  		return;
> 
> -	dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
> -			  ring->paddr_unaligned);
> +	if (ring->cached)
> +		kfree(ring->vaddr_unaligned);
> +	else
> +		dma_free_coherent(ab->dev, ring->size,
> ring->vaddr_unaligned,
> +				  ring->paddr_unaligned);
> 
>  	ring->vaddr_unaligned = NULL;
>  }
> @@ -222,6 +225,7 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab,
> struct dp_srng *ring,
>  	int entry_sz = ath11k_hal_srng_get_entrysize(ab, type);
>  	int max_entries = ath11k_hal_srng_get_max_entries(ab, type);
>  	int ret;
> +	bool cached;
> 
>  	if (max_entries < 0 || entry_sz < 0)
>  		return -EINVAL;
> @@ -229,10 +233,25 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab,
> struct dp_srng *ring,
>  	if (num_entries > max_entries)
>  		num_entries = max_entries;
> 
> +	/* Allocate the reo dst and tx completion rings from cacheable
> memory */
> +	switch (type) {
> +	case HAL_REO_DST:
> +		cached = true;
> +	default:
> +		cached = false;
> +	}
> +
>  	ring->size = (num_entries * entry_sz) + HAL_RING_BASE_ALIGN - 1;
> -	ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
> -						   &ring->paddr_unaligned,
> -						   GFP_KERNEL);
> +
> +	if (cached) {
> +		ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
> +		ring->paddr_unaligned = virt_to_phys(ring->vaddr_unaligned);

Internal developers found this is causing a fault in rx data path.
Suggested fix:
-               ring->paddr_unaligned = 
virt_to_phys(ring->vaddr_unaligned);
+               ring->paddr_unaligned = dma_map_single(ab->dev, 
ring->vaddr_unaligned,
+                                                      ring->size, 
DMA_FROM_DEVICE);


> +	} else {
> +		ring->vaddr_unaligned = dma_alloc_coherent(ab->dev,
> ring->size,
> +
> &ring->paddr_unaligned,
> +							   GFP_KERNEL);
> +	}
> +
>  	if (!ring->vaddr_unaligned)
>  		return -ENOMEM;
> 
> @@ -292,6 +311,11 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab,
> struct dp_srng *ring,
>  		return -EINVAL;
>  	}
> 
> +	if (cached) {
> +		params.flags |= HAL_SRNG_FLAGS_CACHED;
> +		ring->cached = 1;
> +	}
> +
>  	ret = ath11k_hal_srng_setup(ab, type, ring_num, mac_id, &params);
>  	if (ret < 0) {
>  		ath11k_warn(ab, "failed to setup srng: %d ring_id %d\n",
> diff --git a/drivers/net/wireless/ath/ath11k/dp.h
> b/drivers/net/wireless/ath/ath11k/dp.h
> index ee768ccce46e..e6591488a28c 100644
> --- a/drivers/net/wireless/ath/ath11k/dp.h
> +++ b/drivers/net/wireless/ath/ath11k/dp.h
> @@ -64,6 +64,7 @@ struct dp_srng {
>  	dma_addr_t paddr;
>  	int size;
>  	u32 ring_id;
> +	u8 cached;
>  };
> 
>  struct dp_rxdma_ring {
> diff --git a/drivers/net/wireless/ath/ath11k/hal.c
> b/drivers/net/wireless/ath/ath11k/hal.c
> index eaa0edca5576..a58e86e42b5b 100644
> --- a/drivers/net/wireless/ath/ath11k/hal.c
> +++ b/drivers/net/wireless/ath/ath11k/hal.c
> @@ -627,6 +627,21 @@ u32 *ath11k_hal_srng_dst_peek(struct ath11k_base 
> *ab,
> struct hal_srng *srng)
>  	return NULL;
>  }
> 
> +static void ath11k_hal_srng_prefetch_desc(struct ath11k_base *ab,
> +					  struct hal_srng *srng)
> +{
> +	u32 *desc;
> +
> +	/* prefetch only if desc is available */
> +	desc = ath11k_hal_srng_dst_peek(ab, srng);
> +	if (likely(desc)) {
> +		dma_sync_single_for_cpu(ab->dev, virt_to_phys(desc),
> +					(srng->entry_size * sizeof(u32)),
> +					DMA_FROM_DEVICE);
> +		prefetch(desc);
> +	}
> +}
> +
>  u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
>  					struct hal_srng *srng)
>  {
> @@ -642,6 +657,10 @@ u32 *ath11k_hal_srng_dst_get_next_entry(struct
> ath11k_base *ab,
>  	srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size) %
>  			      srng->ring_size;
> 
> +	/* Try to prefetch the next descriptor in the ring */
> +	if (srng->flags & HAL_SRNG_FLAGS_CACHED)
> +		ath11k_hal_srng_prefetch_desc(ab, srng);
> +
>  	return desc;
>  }
> 
> @@ -775,11 +794,13 @@ void ath11k_hal_srng_access_begin(struct 
> ath11k_base
> *ab, struct hal_srng *srng)
>  {
>  	lockdep_assert_held(&srng->lock);
> 
> -	if (srng->ring_dir == HAL_SRNG_DIR_SRC)
> +	if (srng->ring_dir == HAL_SRNG_DIR_SRC) {
>  		srng->u.src_ring.cached_tp =
>  			*(volatile u32 *)srng->u.src_ring.tp_addr;
> -	else
> +	} else {
>  		srng->u.dst_ring.cached_hp = *srng->u.dst_ring.hp_addr;
> +		ath11k_hal_srng_prefetch_desc(ab, srng);
> +	}
>  }
> 
>  /* Update cached ring head/tail pointers to HW.
> ath11k_hal_srng_access_begin()
> diff --git a/drivers/net/wireless/ath/ath11k/hal.h
> b/drivers/net/wireless/ath/ath11k/hal.h
> index 35ed3a14e200..0f4f9ce74354 100644
> --- a/drivers/net/wireless/ath/ath11k/hal.h
> +++ b/drivers/net/wireless/ath/ath11k/hal.h
> @@ -513,6 +513,7 @@ enum hal_srng_dir {
>  #define HAL_SRNG_FLAGS_DATA_TLV_SWAP		0x00000020
>  #define HAL_SRNG_FLAGS_LOW_THRESH_INTR_EN	0x00010000
>  #define HAL_SRNG_FLAGS_MSI_INTR			0x00020000
> +#define HAL_SRNG_FLAGS_CACHED                   0x20000000
>  #define HAL_SRNG_FLAGS_LMAC_RING		0x80000000
> 
>  #define HAL_SRNG_TLV_HDR_TAG		GENMASK(9, 1)
> --
> 2.25.1
diff mbox series

Patch

diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index b0c8f6290099..cf869ebc209a 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -101,8 +101,11 @@  void ath11k_dp_srng_cleanup(struct ath11k_base *ab, struct dp_srng *ring)
 	if (!ring->vaddr_unaligned)
 		return;
 
-	dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
-			  ring->paddr_unaligned);
+	if (ring->cached)
+		kfree(ring->vaddr_unaligned);
+	else
+		dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
+				  ring->paddr_unaligned);
 
 	ring->vaddr_unaligned = NULL;
 }
@@ -222,6 +225,7 @@  int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 	int entry_sz = ath11k_hal_srng_get_entrysize(ab, type);
 	int max_entries = ath11k_hal_srng_get_max_entries(ab, type);
 	int ret;
+	bool cached;
 
 	if (max_entries < 0 || entry_sz < 0)
 		return -EINVAL;
@@ -229,10 +233,25 @@  int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 	if (num_entries > max_entries)
 		num_entries = max_entries;
 
+	/* Allocate the reo dst and tx completion rings from cacheable memory */
+	switch (type) {
+	case HAL_REO_DST:
+		cached = true;
+	default:
+		cached = false;
+	}
+
 	ring->size = (num_entries * entry_sz) + HAL_RING_BASE_ALIGN - 1;
-	ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
-						   &ring->paddr_unaligned,
-						   GFP_KERNEL);
+
+	if (cached) {
+		ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
+		ring->paddr_unaligned = virt_to_phys(ring->vaddr_unaligned);
+	} else {
+		ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
+							   &ring->paddr_unaligned,
+							   GFP_KERNEL);
+	}
+
 	if (!ring->vaddr_unaligned)
 		return -ENOMEM;
 
@@ -292,6 +311,11 @@  int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 		return -EINVAL;
 	}
 
+	if (cached) {
+		params.flags |= HAL_SRNG_FLAGS_CACHED;
+		ring->cached = 1;
+	}
+
 	ret = ath11k_hal_srng_setup(ab, type, ring_num, mac_id, &params);
 	if (ret < 0) {
 		ath11k_warn(ab, "failed to setup srng: %d ring_id %d\n",
diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index ee768ccce46e..e6591488a28c 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -64,6 +64,7 @@  struct dp_srng {
 	dma_addr_t paddr;
 	int size;
 	u32 ring_id;
+	u8 cached;
 };
 
 struct dp_rxdma_ring {
diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index eaa0edca5576..a58e86e42b5b 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -627,6 +627,21 @@  u32 *ath11k_hal_srng_dst_peek(struct ath11k_base *ab, struct hal_srng *srng)
 	return NULL;
 }
 
+static void ath11k_hal_srng_prefetch_desc(struct ath11k_base *ab,
+					  struct hal_srng *srng)
+{
+	u32 *desc;
+
+	/* prefetch only if desc is available */
+	desc = ath11k_hal_srng_dst_peek(ab, srng);
+	if (likely(desc)) {
+		dma_sync_single_for_cpu(ab->dev, virt_to_phys(desc),
+					(srng->entry_size * sizeof(u32)),
+					DMA_FROM_DEVICE);
+		prefetch(desc);
+	}
+}
+
 u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
 					struct hal_srng *srng)
 {
@@ -642,6 +657,10 @@  u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
 	srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size) %
 			      srng->ring_size;
 
+	/* Try to prefetch the next descriptor in the ring */
+	if (srng->flags & HAL_SRNG_FLAGS_CACHED)
+		ath11k_hal_srng_prefetch_desc(ab, srng);
+
 	return desc;
 }
 
@@ -775,11 +794,13 @@  void ath11k_hal_srng_access_begin(struct ath11k_base *ab, struct hal_srng *srng)
 {
 	lockdep_assert_held(&srng->lock);
 
-	if (srng->ring_dir == HAL_SRNG_DIR_SRC)
+	if (srng->ring_dir == HAL_SRNG_DIR_SRC) {
 		srng->u.src_ring.cached_tp =
 			*(volatile u32 *)srng->u.src_ring.tp_addr;
-	else
+	} else {
 		srng->u.dst_ring.cached_hp = *srng->u.dst_ring.hp_addr;
+		ath11k_hal_srng_prefetch_desc(ab, srng);
+	}
 }
 
 /* Update cached ring head/tail pointers to HW. ath11k_hal_srng_access_begin()
diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 35ed3a14e200..0f4f9ce74354 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -513,6 +513,7 @@  enum hal_srng_dir {
 #define HAL_SRNG_FLAGS_DATA_TLV_SWAP		0x00000020
 #define HAL_SRNG_FLAGS_LOW_THRESH_INTR_EN	0x00010000
 #define HAL_SRNG_FLAGS_MSI_INTR			0x00020000
+#define HAL_SRNG_FLAGS_CACHED                   0x20000000
 #define HAL_SRNG_FLAGS_LMAC_RING		0x80000000
 
 #define HAL_SRNG_TLV_HDR_TAG		GENMASK(9, 1)