diff mbox

[v2] ath10k: do not use coherent memory for allocated device memory chunks

Message ID 1448908321-3042-1-git-send-email-nbd@openwrt.org (mailing list archive)
State Not Applicable
Delegated to: Kalle Valo
Headers show

Commit Message

Felix Fietkau Nov. 30, 2015, 6:32 p.m. UTC
Coherent memory is more expensive to allocate (and constrained on some
architectures where it has to be pre-allocated). It is also completely
unnecessary, since the host has no reason to even access these allocated
memory spaces

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
 drivers/net/wireless/ath/ath10k/wmi.c | 61 ++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 18 deletions(-)

Comments

Kalle Valo Dec. 8, 2015, 2:54 p.m. UTC | #1
Felix Fietkau <nbd@openwrt.org> writes:

> Coherent memory is more expensive to allocate (and constrained on some
> architectures where it has to be pre-allocated). It is also completely
> unnecessary, since the host has no reason to even access these allocated
> memory spaces
>
> Signed-off-by: Felix Fietkau <nbd@openwrt.org>

Applied, thanks.
Ben Greear Dec. 30, 2015, 9:55 p.m. UTC | #2
On 11/30/2015 10:32 AM, Felix Fietkau wrote:
> Coherent memory is more expensive to allocate (and constrained on some
> architectures where it has to be pre-allocated). It is also completely
> unnecessary, since the host has no reason to even access these allocated
> memory spaces
>
> Signed-off-by: Felix Fietkau <nbd@openwrt.org>

> -	memset(ar->wmi.mem_chunks[idx].vaddr, 0, pool_size);
> +	if (!num_units)
> +		return -ENOMEM;
> +
> +	paddr = dma_map_single(ar->dev, vaddr, pool_size, DMA_TO_DEVICE);
> +	if (dma_mapping_error(ar->dev, paddr)) {
> +		kfree(vaddr);
> +		return -ENOMEM;
> +	}

Are you sure you have the direction correct for the 'dma_map_single'
call?

I thought this memory was for the NIC to scribble in, and probably
host should never even bother reading or writing it?

The reason I started looking at this is that I see these errors when
trying to use a 4x4 wave-2 ath10k NIC in a 4.4.0-rc7 ath kernel:

[  202.489625] wlan0: authenticate with 00:0e:8e:f8:73:96
[  202.784533] DMAR: DRHD: handling fault status reg 3
[  202.786246] wlan0: send auth to 00:0e:8e:f8:73:96 (try 1/3)
[  202.788133] DMAR: DMAR:[DMA Write] Request device [05:00.0] fault addr ff5de000
DMAR:[fault reason 05] PTE Write access is not set
[  202.887410] wlan0: send auth to 00:0e:8e:f8:73:96 (try 2/3)
[  202.988423] wlan0: send auth to 00:0e:8e:f8:73:96 (try 3/3)
[  203.089437] wlan0: authentication with 00:0e:8e:f8:73:96 timed out

This is on an Intel x86-64 system with IOMMU (VT-d) enabled.

I'm likely having more than one problem since a 4.3.0-rc6+ kernel is not
working with this NIC either...

Thanks,
Ben
diff mbox

Patch

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 9021079..1386dd8 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -4300,34 +4300,58 @@  void ath10k_wmi_event_vdev_resume_req(struct ath10k *ar, struct sk_buff *skb)
 	ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_VDEV_RESUME_REQ_EVENTID\n");
 }
 
-static int ath10k_wmi_alloc_host_mem(struct ath10k *ar, u32 req_id,
-				     u32 num_units, u32 unit_len)
+static int ath10k_wmi_alloc_chunk(struct ath10k *ar, u32 req_id,
+				  u32 num_units, u32 unit_len)
 {
 	dma_addr_t paddr;
-	u32 pool_size;
+	u32 pool_size = 0;
 	int idx = ar->wmi.num_mem_chunks;
+	void *vaddr = NULL;
 
-	pool_size = num_units * round_up(unit_len, 4);
+	if (ar->wmi.num_mem_chunks == ARRAY_SIZE(ar->wmi.mem_chunks))
+		return -ENOMEM;
 
-	if (!pool_size)
-		return -EINVAL;
+	while (!vaddr && num_units) {
+		pool_size = num_units * round_up(unit_len, 4);
+		if (!pool_size)
+			return -EINVAL;
 
-	ar->wmi.mem_chunks[idx].vaddr = dma_alloc_coherent(ar->dev,
-							   pool_size,
-							   &paddr,
-							   GFP_KERNEL);
-	if (!ar->wmi.mem_chunks[idx].vaddr) {
-		ath10k_warn(ar, "failed to allocate memory chunk\n");
-		return -ENOMEM;
+		vaddr = kzalloc(pool_size, GFP_KERNEL | __GFP_NOWARN);
+		if (!vaddr)
+			num_units /= 2;
 	}
 
-	memset(ar->wmi.mem_chunks[idx].vaddr, 0, pool_size);
+	if (!num_units)
+		return -ENOMEM;
+
+	paddr = dma_map_single(ar->dev, vaddr, pool_size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ar->dev, paddr)) {
+		kfree(vaddr);
+		return -ENOMEM;
+	}
 
+	ar->wmi.mem_chunks[idx].vaddr = vaddr;
 	ar->wmi.mem_chunks[idx].paddr = paddr;
 	ar->wmi.mem_chunks[idx].len = pool_size;
 	ar->wmi.mem_chunks[idx].req_id = req_id;
 	ar->wmi.num_mem_chunks++;
 
+	return num_units;
+}
+
+static int ath10k_wmi_alloc_host_mem(struct ath10k *ar, u32 req_id,
+				     u32 num_units, u32 unit_len)
+{
+	int ret;
+
+	while (num_units) {
+		ret = ath10k_wmi_alloc_chunk(ar, req_id, num_units, unit_len);
+		if (ret < 0)
+			return ret;
+
+		num_units -= ret;
+	}
+
 	return 0;
 }
 
@@ -7705,10 +7729,11 @@  void ath10k_wmi_free_host_mem(struct ath10k *ar)
 
 	/* free the host memory chunks requested by firmware */
 	for (i = 0; i < ar->wmi.num_mem_chunks; i++) {
-		dma_free_coherent(ar->dev,
-				  ar->wmi.mem_chunks[i].len,
-				  ar->wmi.mem_chunks[i].vaddr,
-				  ar->wmi.mem_chunks[i].paddr);
+		dma_unmap_single(ar->dev,
+				 ar->wmi.mem_chunks[i].paddr,
+				 ar->wmi.mem_chunks[i].len,
+				 DMA_TO_DEVICE);
+		kfree(ar->wmi.mem_chunks[i].vaddr);
 	}
 
 	ar->wmi.num_mem_chunks = 0;