[147/190] drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass
diff mbox

Message ID 1452510091-6833-6-git-send-email-chris@chris-wilson.co.uk
State New
Headers show

Commit Message

Chris Wilson Jan. 11, 2016, 11 a.m. UTC
On an Ivybridge i7-3720qm with 1600MHz DDR3, with 32 fences,
Upload rate for 2 linear surfaces:  8134MiB/s -> 8154MiB/s
Upload rate for 2 tiled surfaces:   8625MiB/s -> 8632MiB/s
Upload rate for 4 linear surfaces:  8127MiB/s -> 8134MiB/s
Upload rate for 4 tiled surfaces:   8602MiB/s -> 8629MiB/s
Upload rate for 8 linear surfaces:  8124MiB/s -> 8137MiB/s
Upload rate for 8 tiled surfaces:   8603MiB/s -> 8624MiB/s
Upload rate for 16 linear surfaces: 8123MiB/s -> 8128MiB/s
Upload rate for 16 tiled surfaces:  8606MiB/s -> 8618MiB/s
Upload rate for 32 linear surfaces: 8121MiB/s -> 8128MiB/s
Upload rate for 32 tiled surfaces:  8605MiB/s -> 8614MiB/s
Upload rate for 64 linear surfaces: 8121MiB/s -> 8127MiB/s
Upload rate for 64 tiled surfaces:  3017MiB/s -> 5202MiB/s

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Testcase: igt/gem_fence_upload/performance
Testcase: igt/gem_mmap_gtt
---
 drivers/gpu/drm/Makefile           |   2 +-
 drivers/gpu/drm/i915/Makefile      |   5 +-
 drivers/gpu/drm/i915/i915_drv.h    |   4 ++
 drivers/gpu/drm/i915/i915_gem.c    |  46 +++-----------
 drivers/gpu/drm/i915/i915_memory.c | 122 +++++++++++++++++++++++++++++++++++++
 5 files changed, 138 insertions(+), 41 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_memory.c

Patch
diff mbox

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index f858aa25fbb2..6834d0e33741 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -43,7 +43,7 @@  obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
 obj-$(CONFIG_DRM_MGA)	+= mga/
 obj-$(CONFIG_DRM_I810)	+= i810/
-obj-$(CONFIG_DRM_I915)  += i915/
+obj-y += i915/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_VC4)  += vc4/
 obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 79d657f29241..a362425ef862 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -100,6 +100,9 @@  i915-y += i915_vgpu.o
 # legacy horrors
 i915-y += i915_dma.o
 
-obj-$(CONFIG_DRM_I915)  += i915.o
+obj-$(CONFIG_DRM_I915) += i915.o
+ifdef CONFIG_DRM_I915
+obj-y += i915_memory.o
+endif
 
 CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 45b8cbdfab55..e6f49175af1b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3447,4 +3447,8 @@  static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 	return false;
 }
 
+int remap_io_mapping(struct vm_area_struct *vma,
+		     unsigned long addr, unsigned long pfn, unsigned long size,
+		     struct io_mapping *iomap);
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e321fdd90d2..1fa4752682d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1449,7 +1449,6 @@  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_vma *ggtt;
 	pgoff_t page_offset;
-	unsigned long pfn;
 	int ret = 0;
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
@@ -1517,44 +1516,13 @@  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		goto unpin;
 
 	/* Finally, remap it using the new GTT offset */
-	pfn = dev_priv->gtt.mappable_base + ggtt->node.start;
-	pfn >>= PAGE_SHIFT;
-
-	if (ggtt->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
-		if (!obj->fault_mappable) {
-			unsigned long size = min_t(unsigned long,
-						   vma->vm_end - vma->vm_start,
-						   obj->base.size);
-			int i;
-
-			for (i = 0; i < size >> PAGE_SHIFT; i++) {
-				ret = vm_insert_pfn(vma,
-						    (unsigned long)vma->vm_start + i * PAGE_SIZE,
-						    pfn + i);
-				if (ret)
-					break;
-			}
-		} else
-			ret = vm_insert_pfn(vma,
-					    (unsigned long)vmf->virtual_address,
-					    pfn + page_offset);
-	} else {
-		/* Overriding existing pages in partial view does not cause
-		 * us any trouble as TLBs are still valid because the fault
-		 * is due to userspace losing part of the mapping or never
-		 * having accessed it before (at this partials' range).
-		 */
-		const struct i915_ggtt_view *view = &ggtt->ggtt_view;
-		unsigned long base = vma->vm_start +
-				     (view->params.partial.offset << PAGE_SHIFT);
-		unsigned int i;
-
-		for (i = 0; i < view->params.partial.size; i++) {
-			ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
-			if (ret)
-				break;
-		}
-	}
+	ret = remap_io_mapping(vma,
+			       vma->vm_start + (ggtt->ggtt_view.params.partial.offset << PAGE_SHIFT),
+			       (dev_priv->gtt.mappable_base + ggtt->node.start) >> PAGE_SHIFT,
+			       min_t(u64, ggtt->size, vma->vm_end - vma->vm_start),
+			       &dev_priv->gtt.mappable);
+	if (ret)
+		goto unpin;
 
 	obj->fault_mappable = true;
 unpin:
diff --git a/drivers/gpu/drm/i915/i915_memory.c b/drivers/gpu/drm/i915/i915_memory.c
new file mode 100644
index 000000000000..f684576022f3
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_memory.c
@@ -0,0 +1,122 @@ 
+#include <linux/mm.h>
+#include <linux/io-mapping.h>
+
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include "i915_drv.h"
+
+struct remap_pfn {
+	struct mm_struct *mm;
+	unsigned long addr;
+	unsigned long pfn;
+	pgprot_t prot;
+};
+
+static inline void remap_pfn(struct remap_pfn *r, pte_t *pte)
+{
+	set_pte_at(r->mm, r->addr, pte,
+		   pte_mkspecial(pfn_pte(r->pfn, r->prot)));
+	r->pfn++;
+	r->addr += PAGE_SIZE;
+}
+
+static inline int remap_pte_range(struct remap_pfn *r, pmd_t *pmd, unsigned long end)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_alloc_map_lock(r->mm, pmd, r->addr, &ptl);
+	if (!pte)
+		return -ENOMEM;
+
+	arch_enter_lazy_mmu_mode();
+	do
+		remap_pfn(r, pte++);
+	while (r->addr < end);
+	arch_leave_lazy_mmu_mode();
+
+	pte_unmap_unlock(pte - 1, ptl);
+	return 0;
+}
+
+static inline int remap_pmd_range(struct remap_pfn *r, pud_t *pud, unsigned long end)
+{
+	pmd_t *pmd;
+	int err;
+
+	pmd = pmd_alloc(r->mm, pud, r->addr);
+	if (!pmd)
+		return -ENOMEM;
+	VM_BUG_ON(pmd_trans_huge(*pmd));
+
+	do
+		err = remap_pte_range(r, pmd++, pmd_addr_end(r->addr, end));
+	while (err == 0 && r->addr < end);
+
+	return err;
+}
+
+static inline int remap_pud_range(struct remap_pfn *r, pgd_t *pgd, unsigned long end)
+{
+	pud_t *pud;
+	int err;
+
+	pud = pud_alloc(r->mm, pgd, r->addr);
+	if (!pud)
+		return -ENOMEM;
+
+	do
+		err = remap_pmd_range(r, pud++, pud_addr_end(r->addr, end));
+	while (err == 0 && r->addr < end);
+
+	return err;
+}
+
+/**
+ * remap_io_mapping - remap an IO mapping to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @pfn: physical address of kernel memory
+ * @size: size of map area
+ * @iomap: the source io_mapping
+ *
+ *  Note: this is only safe if the mm semaphore is held when called.
+ */
+int remap_io_mapping(struct vm_area_struct *vma,
+		     unsigned long addr, unsigned long pfn, unsigned long size,
+		     struct io_mapping *iomap)
+{
+	unsigned long end = addr + PAGE_ALIGN(size);
+	struct remap_pfn r;
+	pgd_t *pgd;
+	int err;
+
+	if (WARN_ON(addr >= end))
+		return -EINVAL;
+
+#define MUST_SET (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+	BUG_ON((vma->vm_flags & MUST_SET) != MUST_SET);
+#undef MUST_SET
+
+	r.mm = vma->vm_mm;
+	r.addr = addr;
+	r.pfn = pfn;
+	r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
+			  (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
+
+	pgd = pgd_offset(r.mm, addr);
+	do
+		err = remap_pud_range(&r, pgd++, pgd_addr_end(r.addr, end));
+	while (err == 0 && r.addr < end);
+
+	if (err)
+		zap_vma_ptes(vma, addr, r.addr - addr);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(remap_io_mapping);