diff mbox

[v2,5/7] ARM: NOMMU: Introduce dma operations for noMMU

Message ID 1488885154-12429-6-git-send-email-vladimir.murzin@arm.com
State New, archived
Headers show

Commit Message

Vladimir Murzin March 7, 2017, 11:12 a.m. UTC
R/M classes of cpus can have memory covered by MPU which in turn might
configure RAM as Normal i.e. bufferable and cacheable. It breaks
dma_alloc_coherent() and friends, since data can stuck in caches now
or be buffered.

This patch factors out DMA support for NOMMU configuration into
separate entity which provides dedicated dma_ops. We have to handle
there several cases:
- configurations with MMU/MPU setup
- configurations without MMU/MPU setup
- special case for M-class, since caches and MPU there are optional

In general we rely on default DMA area for coherent allocations or/and
per-device memory reserves suitable for coherent DMA, so if such
regions are set coherent allocations go from there.

In case MPU/MPU was not setup we fallback to normal page allocator for
DMA memory allocation.

In case we run M-class cpus, for configuration without cache support
(like Cortex-M3/M4) dma operations are forced to be coherent and wired
with dma-noop (such decision is made based on cacheid global
variable); however, if caches are detected there and no DMA coherent
region is given (either default or per-device), dma is disallowed even
MPU is not set - it is because M-class implement system memory map
which defines part of address space as Normal memory.

Reported-by: Alexandre Torgue <alexandre.torgue@st.com>
Reported-by: Andras Szemzo <sza@esh.hu>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
---
 arch/arm/Kconfig                   |   1 +
 arch/arm/include/asm/dma-mapping.h |   2 +-
 arch/arm/mm/Makefile               |   5 +-
 arch/arm/mm/dma-mapping-nommu.c    | 253 +++++++++++++++++++++++++++++++++++++
 4 files changed, 257 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/mm/dma-mapping-nommu.c

Comments

kernel test robot March 9, 2017, 2:06 p.m. UTC | #1
Hi Vladimir,

[auto build test WARNING on next-20170308]
[also build test WARNING on v4.11-rc1]
[cannot apply to linux/master v4.9-rc8 v4.9-rc7 v4.9-rc6]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Vladimir-Murzin/ARM-Fix-dma_alloc_coherent-and-friends-for-NOMMU/20170309-193212
config: arm-allnoconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm 

All warnings (new ones prefixed by >>):

   arch/arm/mm/dma-mapping-nommu.c: In function 'arm_nommu_dma_alloc':
>> arch/arm/mm/dma-mapping-nommu.c:42:28: warning: initialization discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
     struct dma_map_ops *ops = &dma_noop_ops;
                               ^
   arch/arm/mm/dma-mapping-nommu.c: In function 'arm_nommu_dma_free':
   arch/arm/mm/dma-mapping-nommu.c:64:28: warning: initialization discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
     struct dma_map_ops *ops = &dma_noop_ops;
                               ^
   arch/arm/mm/dma-mapping-nommu.c: In function 'arm_nommu_dma_mmap':
   arch/arm/mm/dma-mapping-nommu.c:78:28: warning: initialization discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
     struct dma_map_ops *ops = &dma_noop_ops;
                               ^
   arch/arm/mm/dma-mapping-nommu.c: In function 'arm_nommu_get_dma_map_ops':
>> arch/arm/mm/dma-mapping-nommu.c:207:34: warning: return discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
     return coherent ? &dma_noop_ops : &arm_nommu_dma_ops;
            ~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~

vim +/const +42 arch/arm/mm/dma-mapping-nommu.c

    36	
    37	static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
    38					 dma_addr_t *dma_handle, gfp_t gfp,
    39					 unsigned long attrs)
    40	
    41	{
  > 42		struct dma_map_ops *ops = &dma_noop_ops;
    43	
    44		/*
    45		 * We are here because:
    46		 * - no consistent DMA region has been defined, so we can't
    47		 *   continue.
    48		 * - there is no space left in consistent DMA region, so we
    49		 *   only can fallback to generic allocator if we are
    50		 *   advertised that consistency is not required.
    51		 */
    52	
    53		if (attrs & DMA_ATTR_NON_CONSISTENT)
    54			return ops->alloc(dev, size, dma_handle, gfp, attrs);
    55	
    56		WARN_ON_ONCE(1);
    57		return NULL;
    58	}
    59	
    60	static void arm_nommu_dma_free(struct device *dev, size_t size,
    61				       void *cpu_addr, dma_addr_t dma_addr,
    62				       unsigned long attrs)
    63	{
  > 64		struct dma_map_ops *ops = &dma_noop_ops;
    65	
    66		if (attrs & DMA_ATTR_NON_CONSISTENT)
    67			ops->free(dev, size, cpu_addr, dma_addr, attrs);
    68		else
    69			WARN_ON_ONCE(1);
    70	
    71		return;
    72	}
    73	
    74	static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
    75				      void *cpu_addr, dma_addr_t dma_addr, size_t size,
    76				      unsigned long attrs)
    77	{
    78		struct dma_map_ops *ops = &dma_noop_ops;
    79		int ret;
    80	
    81		if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
    82			return ret;
    83	
    84		if (attrs & DMA_ATTR_NON_CONSISTENT)
    85			return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
    86	
    87		WARN_ON_ONCE(1);
    88		return -ENXIO;
    89	}
    90	
    91	static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size,
    92					  enum dma_data_direction dir)
    93	{
    94		dmac_map_area(__va(paddr), size, dir);
    95	
    96		if (dir == DMA_FROM_DEVICE)
    97			outer_inv_range(paddr, paddr + size);
    98		else
    99			outer_clean_range(paddr, paddr + size);
   100	}
   101	
   102	static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size,
   103					  enum dma_data_direction dir)
   104	{
   105		if (dir != DMA_TO_DEVICE) {
   106			outer_inv_range(paddr, paddr + size);
   107			dmac_unmap_area(__va(paddr), size, dir);
   108		}
   109	}
   110	
   111	static dma_addr_t arm_nommu_dma_map_page(struct device *dev, struct page *page,
   112						 unsigned long offset, size_t size,
   113						 enum dma_data_direction dir,
   114						 unsigned long attrs)
   115	{
   116		dma_addr_t handle = page_to_phys(page) + offset;
   117	
   118		__dma_page_cpu_to_dev(handle, size, dir);
   119	
   120		return handle;
   121	}
   122	
   123	static void arm_nommu_dma_unmap_page(struct device *dev, dma_addr_t handle,
   124					     size_t size, enum dma_data_direction dir,
   125					     unsigned long attrs)
   126	{
   127		__dma_page_dev_to_cpu(handle, size, dir);
   128	}
   129	
   130	
   131	static int arm_nommu_dma_map_sg(struct device *dev, struct scatterlist *sgl,
   132					int nents, enum dma_data_direction dir,
   133					unsigned long attrs)
   134	{
   135		int i;
   136		struct scatterlist *sg;
   137	
   138		for_each_sg(sgl, sg, nents, i) {
   139			sg_dma_address(sg) = sg_phys(sg);
   140			sg_dma_len(sg) = sg->length;
   141			__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
   142		}
   143	
   144		return nents;
   145	}
   146	
   147	static void arm_nommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
   148					   int nents, enum dma_data_direction dir,
   149					   unsigned long attrs)
   150	{
   151		struct scatterlist *sg;
   152		int i;
   153	
   154		for_each_sg(sgl, sg, nents, i)
   155			__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
   156	}
   157	
   158	static void arm_nommu_dma_sync_single_for_device(struct device *dev,
   159			dma_addr_t handle, size_t size, enum dma_data_direction dir)
   160	{
   161		__dma_page_cpu_to_dev(handle, size, dir);
   162	}
   163	
   164	static void arm_nommu_dma_sync_single_for_cpu(struct device *dev,
   165			dma_addr_t handle, size_t size, enum dma_data_direction dir)
   166	{
   167		__dma_page_cpu_to_dev(handle, size, dir);
   168	}
   169	
   170	static void arm_nommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
   171						     int nents, enum dma_data_direction dir)
   172	{
   173		struct scatterlist *sg;
   174		int i;
   175	
   176		for_each_sg(sgl, sg, nents, i)
   177			__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
   178	}
   179	
   180	static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
   181						  int nents, enum dma_data_direction dir)
   182	{
   183		struct scatterlist *sg;
   184		int i;
   185	
   186		for_each_sg(sgl, sg, nents, i)
   187			__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
   188	}
   189	
   190	const struct dma_map_ops arm_nommu_dma_ops = {
   191		.alloc			= arm_nommu_dma_alloc,
   192		.free			= arm_nommu_dma_free,
   193		.mmap			= arm_nommu_dma_mmap,
   194		.map_page		= arm_nommu_dma_map_page,
   195		.unmap_page		= arm_nommu_dma_unmap_page,
   196		.map_sg			= arm_nommu_dma_map_sg,
   197		.unmap_sg		= arm_nommu_dma_unmap_sg,
   198		.sync_single_for_device	= arm_nommu_dma_sync_single_for_device,
   199		.sync_single_for_cpu	= arm_nommu_dma_sync_single_for_cpu,
   200		.sync_sg_for_device	= arm_nommu_dma_sync_sg_for_device,
   201		.sync_sg_for_cpu	= arm_nommu_dma_sync_sg_for_cpu,
   202	};
   203	EXPORT_SYMBOL(arm_nommu_dma_ops);
   204	
   205	static struct dma_map_ops *arm_nommu_get_dma_map_ops(bool coherent)
   206	{
 > 207		return coherent ? &dma_noop_ops : &arm_nommu_dma_ops;
   208	}
   209	
   210	void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6ab63fa..8f0b6ca 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -22,6 +22,7 @@  config ARM
 	select CLONE_BACKWARDS
 	select CPU_PM if (SUSPEND || CPU_IDLE)
 	select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select DMA_NOOP_OPS if !MMU
 	select EDAC_SUPPORT
 	select EDAC_ATOMIC_SCRUB
 	select GENERIC_ALLOCATOR
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 7166569..63270de 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -20,7 +20,7 @@  static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
 {
 	if (dev && dev->dma_ops)
 		return dev->dma_ops;
-	return &arm_dma_ops;
+	return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : &dma_noop_ops;
 }
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 54857bc..ea80df7 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -2,9 +2,8 @@ 
 # Makefile for the linux arm-specific parts of the memory manager.
 #
 
-obj-y				:= dma-mapping.o extable.o fault.o init.o \
-				   iomap.o
-
+obj-y				:= extable.o fault.o init.o iomap.o
+obj-y				+= dma-mapping$(MMUEXT).o
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
 				   mmap.o pgd.o mmu.o pageattr.o
 
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
new file mode 100644
index 0000000..ecbeadb
--- /dev/null
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -0,0 +1,253 @@ 
+/*
+ *  Based on linux/arch/arm/mm/dma-mapping.c
+ *
+ *  Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include <asm/cp15.h>
+
+#include "dma.h"
+
+/*
+ *  dma_noop_ops is used if
+ *   - MMU/MPU is off
+ *   - cpu is v7m w/o cache support
+ *   - device is coherent
+ *  otherwise arm_nommu_dma_ops is used.
+ *
+ *  arm_nommu_dma_ops rely on consistent DMA memory (please, refer to
+ *  [1] on how to declare such memory).
+ *
+ *  [1] Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+ */
+
+static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t gfp,
+				 unsigned long attrs)
+
+{
+	struct dma_map_ops *ops = &dma_noop_ops;
+
+	/*
+	 * We are here because:
+	 * - no consistent DMA region has been defined, so we can't
+	 *   continue.
+	 * - there is no space left in consistent DMA region, so we
+	 *   only can fallback to generic allocator if we are
+	 *   advertised that consistency is not required.
+	 */
+
+	if (attrs & DMA_ATTR_NON_CONSISTENT)
+		return ops->alloc(dev, size, dma_handle, gfp, attrs);
+
+	WARN_ON_ONCE(1);
+	return NULL;
+}
+
+static void arm_nommu_dma_free(struct device *dev, size_t size,
+			       void *cpu_addr, dma_addr_t dma_addr,
+			       unsigned long attrs)
+{
+	struct dma_map_ops *ops = &dma_noop_ops;
+
+	if (attrs & DMA_ATTR_NON_CONSISTENT)
+		ops->free(dev, size, cpu_addr, dma_addr, attrs);
+	else
+		WARN_ON_ONCE(1);
+
+	return;
+}
+
+static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+			      void *cpu_addr, dma_addr_t dma_addr, size_t size,
+			      unsigned long attrs)
+{
+	struct dma_map_ops *ops = &dma_noop_ops;
+	int ret;
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	if (attrs & DMA_ATTR_NON_CONSISTENT)
+		return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+
+	WARN_ON_ONCE(1);
+	return -ENXIO;
+}
+
+static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size,
+				  enum dma_data_direction dir)
+{
+	dmac_map_area(__va(paddr), size, dir);
+
+	if (dir == DMA_FROM_DEVICE)
+		outer_inv_range(paddr, paddr + size);
+	else
+		outer_clean_range(paddr, paddr + size);
+}
+
+static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size,
+				  enum dma_data_direction dir)
+{
+	if (dir != DMA_TO_DEVICE) {
+		outer_inv_range(paddr, paddr + size);
+		dmac_unmap_area(__va(paddr), size, dir);
+	}
+}
+
+static dma_addr_t arm_nommu_dma_map_page(struct device *dev, struct page *page,
+					 unsigned long offset, size_t size,
+					 enum dma_data_direction dir,
+					 unsigned long attrs)
+{
+	dma_addr_t handle = page_to_phys(page) + offset;
+
+	__dma_page_cpu_to_dev(handle, size, dir);
+
+	return handle;
+}
+
+static void arm_nommu_dma_unmap_page(struct device *dev, dma_addr_t handle,
+				     size_t size, enum dma_data_direction dir,
+				     unsigned long attrs)
+{
+	__dma_page_dev_to_cpu(handle, size, dir);
+}
+
+
+static int arm_nommu_dma_map_sg(struct device *dev, struct scatterlist *sgl,
+				int nents, enum dma_data_direction dir,
+				unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		sg_dma_address(sg) = sg_phys(sg);
+		sg_dma_len(sg) = sg->length;
+		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
+	}
+
+	return nents;
+}
+
+static void arm_nommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
+				   int nents, enum dma_data_direction dir,
+				   unsigned long attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+static void arm_nommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	__dma_page_cpu_to_dev(handle, size, dir);
+}
+
+static void arm_nommu_dma_sync_single_for_cpu(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	__dma_page_cpu_to_dev(handle, size, dir);
+}
+
+static void arm_nommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
+					     int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
+					  int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+const struct dma_map_ops arm_nommu_dma_ops = {
+	.alloc			= arm_nommu_dma_alloc,
+	.free			= arm_nommu_dma_free,
+	.mmap			= arm_nommu_dma_mmap,
+	.map_page		= arm_nommu_dma_map_page,
+	.unmap_page		= arm_nommu_dma_unmap_page,
+	.map_sg			= arm_nommu_dma_map_sg,
+	.unmap_sg		= arm_nommu_dma_unmap_sg,
+	.sync_single_for_device	= arm_nommu_dma_sync_single_for_device,
+	.sync_single_for_cpu	= arm_nommu_dma_sync_single_for_cpu,
+	.sync_sg_for_device	= arm_nommu_dma_sync_sg_for_device,
+	.sync_sg_for_cpu	= arm_nommu_dma_sync_sg_for_cpu,
+};
+EXPORT_SYMBOL(arm_nommu_dma_ops);
+
+static struct dma_map_ops *arm_nommu_get_dma_map_ops(bool coherent)
+{
+	return coherent ? &dma_noop_ops : &arm_nommu_dma_ops;
+}
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent)
+{
+	struct dma_map_ops *dma_ops;
+
+	if (IS_ENABLED(CONFIG_CPU_V7M)) {
+		/*
+		 * Cache support for v7m is optional, so can be treated as
+		 * coherent if no cache has been detected. Note that it is not
+		 * enough to check if MPU is in use or not since in absense of
+		 * MPU system memory map is used.
+		 */
+		dev->archdata.dma_coherent = (cacheid) ? coherent : true;
+	} else {
+		/*
+		 * Assume coherent DMA in case MMU/MPU has not been set up.
+		 */
+		dev->archdata.dma_coherent = (get_cr() & CR_M) ? coherent : true;
+	}
+
+	dma_ops = arm_nommu_get_dma_map_ops(dev->archdata.dma_coherent);
+
+	set_dma_ops(dev, dma_ops);
+}
+
+void arch_teardown_dma_ops(struct device *dev)
+{
+}
+
+int dma_supported(struct device *dev, u64 mask)
+{
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_supported);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	4096
+
+static int __init dma_debug_do_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+core_initcall(dma_debug_do_init);