diff mbox

[1/2] drm/i915: introduce and use i915_gem_object_map_range()

Message ID 1460577654-4370-1-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon April 13, 2016, 8 p.m. UTC
From: Alex Dai <yu.dai@intel.com>

The recent pin-and-map unification didn't include the command parser's
own custom vmapping code, which essentially duplicates the same
algorithm but without some of the optimisations.

To unify this further, this patch puts the mapping/unmapping operations
from i915_gem_object_pin_map() and i915_gem_object_put_pages() into
separate functions that can then be shared by the command parser.

With this change, we have only one vmap() call in the whole driver :)

Signed-off-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 44 +++++---------------
 drivers/gpu/drm/i915/i915_drv.h        | 26 ++++++++++++
 drivers/gpu/drm/i915/i915_gem.c        | 76 +++++++++++++++++++++++-----------
 3 files changed, 87 insertions(+), 59 deletions(-)

Comments

Chris Wilson April 13, 2016, 8:14 p.m. UTC | #1
On Wed, Apr 13, 2016 at 09:00:53PM +0100, Dave Gordon wrote:
> From: Alex Dai <yu.dai@intel.com>
> 
> The recent pin-and-map unification didn't include the command parser's
> own custom vmapping code, which essentially duplicates the same
> algorithm but without some of the optimisations.

No. There is no need for extra hacks for the cmdparser when the very
issue is that it takes a vmap every batch.
-Chris
Dave Gordon April 13, 2016, 8:49 p.m. UTC | #2
On 13/04/16 21:14, Chris Wilson wrote:
> On Wed, Apr 13, 2016 at 09:00:53PM +0100, Dave Gordon wrote:
>> From: Alex Dai <yu.dai@intel.com>
>>
>> The recent pin-and-map unification didn't include the command parser's
>> own custom vmapping code, which essentially duplicates the same
>> algorithm but without some of the optimisations.
>
> No. There is no need for extra hacks for the cmdparser when the very
> issue is that it takes a vmap every batch.
> -Chris

Actually, sharing your mapping code will mean that it won't use vmap() 
for "sufficiently small" batches (i.e. one page), it will take the kmap 
path instead. And then for larger batches it will take advantage of the 
stack optimisation instead; only the largest won't benefit from that.

Even without the utility of sharing the code with the command parser, 
I'd still be inclined to refactor the pin-and-map into the function that 
does the mapping, with whatever clever optimisations we can apply, and 
the outer wrapper that manages the pinning and error recovery.

BTW, I expected to find a kvunmap() which would do exactly what I moved 
into i915_gem_addr_unmap(), but there doesn't seem to be one.

+static inline void i915_gem_addr_unmap(void *mapped_addr)
+{
+	if (is_vmalloc_addr(mapped_addr))
+		vunmap(mapped_addr);
+	else
+		kunmap(kmap_to_page(mapped_addr));
+}

Do you think this would be of sufficient utility to be pushed upstream? 
The analogous kvfree() is quite widely used!

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index a337f33..8968f33 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -927,40 +927,16 @@  void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine)
 	return NULL;
 }
 
-static u32 *vmap_batch(struct drm_i915_gem_object *obj,
+static u32 *map_batch(struct drm_i915_gem_object *obj,
 		       unsigned start, unsigned len)
 {
-	int i;
-	void *addr = NULL;
-	struct sg_page_iter sg_iter;
-	int first_page = start >> PAGE_SHIFT;
-	int last_page = (len + start + 4095) >> PAGE_SHIFT;
-	int npages = last_page - first_page;
-	struct page **pages;
-
-	pages = drm_malloc_ab(npages, sizeof(*pages));
-	if (pages == NULL) {
-		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
-		goto finish;
-	}
-
-	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
-		pages[i++] = sg_page_iter_page(&sg_iter);
-		if (i == npages)
-			break;
-	}
+	unsigned long first, npages;
 
-	addr = vmap(pages, i, 0, PAGE_KERNEL);
-	if (addr == NULL) {
-		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
-		goto finish;
-	}
+	/* Convert [start, len) to pages */
+	first = start >> PAGE_SHIFT;
+	npages = DIV_ROUND_UP(start + len, PAGE_SIZE) - first;
 
-finish:
-	if (pages)
-		drm_free_large(pages);
-	return (u32*)addr;
+	return i915_gem_object_map_range(obj, first, npages);
 }
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
@@ -987,7 +963,7 @@  static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 		return ERR_PTR(ret);
 	}
 
-	src_base = vmap_batch(src_obj, batch_start_offset, batch_len);
+	src_base = map_batch(src_obj, batch_start_offset, batch_len);
 	if (!src_base) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
 		ret = -ENOMEM;
@@ -1000,7 +976,7 @@  static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 		goto unmap_src;
 	}
 
-	dst = vmap_batch(dest_obj, 0, batch_len);
+	dst = map_batch(dest_obj, 0, batch_len);
 	if (!dst) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
 		ret = -ENOMEM;
@@ -1014,7 +990,7 @@  static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 	memcpy(dst, src, batch_len);
 
 unmap_src:
-	vunmap(src_base);
+	i915_gem_addr_unmap(src_base);
 unpin_src:
 	i915_gem_object_unpin_pages(src_obj);
 
@@ -1262,7 +1238,7 @@  int i915_parse_cmds(struct intel_engine_cs *engine,
 		ret = -EINVAL;
 	}
 
-	vunmap(batch_base);
+	i915_gem_addr_unmap(batch_base);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d1e6e58..e9cee1d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3005,6 +3005,32 @@  static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 }
 
 /**
+ * i915_gem_object_map_range - map some or all of a GEM object into kernel space
+ * @obj: the GEM object to be mapped
+ * @first: offset in pages of the start of the range to be mapped
+ * @npages: length in pages of the range to be mapped. For convenience, a
+ *          length of zero is taken to mean "the remainder of the object"
+ *
+ * Map a given range of a GEM object into kernel virtual space.  The caller must
+ * make sure the associated pages are gathered and pinned before calling this
+ * function, and is responsible for unmapping the returned address when it is no
+ * longer required, by calling i915_gem_addr_unmap().
+ *
+ * Returns the address at which the object has been mapped, or NULL on failure.
+ */
+void *__must_check i915_gem_object_map_range(const struct drm_i915_gem_object *obj,
+					     unsigned long first,
+					     unsigned long npages);
+
+static inline void i915_gem_addr_unmap(void *mapped_addr)
+{
+	if (is_vmalloc_addr(mapped_addr))
+		vunmap(mapped_addr);
+	else
+		kunmap(kmap_to_page(mapped_addr));
+}
+
+/**
  * i915_gem_object_pin_map - return a contiguous mapping of the entire object
  * @obj - the object to map into kernel address space
  *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b37ffea..163ca78 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2233,10 +2233,7 @@  static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	list_del(&obj->global_list);
 
 	if (obj->mapping) {
-		if (is_vmalloc_addr(obj->mapping))
-			vunmap(obj->mapping);
-		else
-			kunmap(kmap_to_page(obj->mapping));
+		i915_gem_addr_unmap(obj->mapping);
 		obj->mapping = NULL;
 	}
 
@@ -2408,6 +2405,55 @@  static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+void *i915_gem_object_map_range(const struct drm_i915_gem_object *obj,
+				unsigned long first,
+				unsigned long npages)
+{
+	unsigned long max_pages = obj->base.size >> PAGE_SHIFT;
+	struct scatterlist *sg = obj->pages->sgl;
+	struct sg_page_iter sg_iter;
+	struct page **pages;
+	unsigned long i = 0;
+	void *addr = NULL;
+
+	/* Minimal range check */
+	if (first + npages > max_pages) {
+		DRM_DEBUG_DRIVER("Invalid page range\n");
+		return NULL;
+	}
+
+	/* npages==0 is shorthand for "the rest of the object" */
+	if (npages == 0)
+		npages = max_pages - first;
+
+	/* A single page can always be kmapped */
+	if (npages == 1)
+		return kmap(sg_page(sg));
+
+	pages = drm_malloc_gfp(npages, sizeof(*pages), GFP_TEMPORARY);
+	if (pages == NULL) {
+		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+		return NULL;
+	}
+
+	for_each_sg_page(sg, &sg_iter, max_pages, first) {
+		pages[i] = sg_page_iter_page(&sg_iter);
+		if (++i == npages) {
+			addr = vmap(pages, npages, 0, PAGE_KERNEL);
+			break;
+		}
+	}
+
+	/* We should have got here via the 'break' above */
+	WARN_ON(i != npages);
+	if (addr == NULL)
+		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
+
+	drm_free_large(pages);
+
+	return addr;
+}
+
 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 {
 	int ret;
@@ -2421,27 +2467,7 @@  void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_pin_pages(obj);
 
 	if (obj->mapping == NULL) {
-		struct page **pages;
-
-		pages = NULL;
-		if (obj->base.size == PAGE_SIZE)
-			obj->mapping = kmap(sg_page(obj->pages->sgl));
-		else
-			pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
-					       sizeof(*pages),
-					       GFP_TEMPORARY);
-		if (pages != NULL) {
-			struct sg_page_iter sg_iter;
-			int n;
-
-			n = 0;
-			for_each_sg_page(obj->pages->sgl, &sg_iter,
-					 obj->pages->nents, 0)
-				pages[n++] = sg_page_iter_page(&sg_iter);
-
-			obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
-			drm_free_large(pages);
-		}
+		obj->mapping = i915_gem_object_map_range(obj, 0, 0);
 		if (obj->mapping == NULL) {
 			i915_gem_object_unpin_pages(obj);
 			return ERR_PTR(-ENOMEM);