diff mbox

[02/16] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages

Message ID 1305235044-9159-3-git-send-email-chris@chris-wilson.co.uk
State New, archived
Headers show

Commit Message

Chris Wilson May 12, 2011, 9:17 p.m. UTC
Replace the three nearly identical copies of the code with a single
function. And take advantage of the opportunity to do some
micro-optimisation: avoid the vmalloc if at all possible and also avoid
dropping the lock unless we are forced to acquire the mm semaphore.

Measuring the impact of the optimisations, it turns out they are not so
micro after all. Running x11perf -aa10text on PineView:

  before 1.28 Mglyph/sec
  after  1.45 Mglyph/sec

(Glyphs, in general and on PineView in particular, are one of the very
few pwrite rate-limited workloads.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Paul Menzel <paulepanter@users.sourceforge.net>
---
 drivers/gpu/drm/i915/i915_gem.c |  149 ++++++++++++++++++++++++---------------
 1 files changed, 92 insertions(+), 57 deletions(-)

Comments

Keith Packard May 13, 2011, 12:21 a.m. UTC | #1
On Thu, 12 May 2011 22:17:10 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:

> +	pages = kmalloc(n*sizeof(struct page *),
> +			GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
> +	if (pages == NULL) {
> +		pages = drm_malloc_ab(n, sizeof(struct page *));
> +		if (pages == NULL) {
> +			*pages_out = NULL;
> +			*num_pages = 0;
> +			return -ENOMEM;
> +		}
> +	}

Please use drm_malloc_ab here unconditionally; you've got a potential
multiplication overflow, and drm_malloc_ab already uses kmalloc for
small amounts anyways.

Otherwise, this looks good to me.
Chris Wilson May 15, 2011, 8 a.m. UTC | #2
On Thu, 12 May 2011 17:21:50 -0700, Keith Packard <keithp@keithp.com> wrote:
> On Thu, 12 May 2011 22:17:10 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > +	pages = kmalloc(n*sizeof(struct page *),
> > +			GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
> > +	if (pages == NULL) {
> > +		pages = drm_malloc_ab(n, sizeof(struct page *));
> > +		if (pages == NULL) {
> > +			*pages_out = NULL;
> > +			*num_pages = 0;
> > +			return -ENOMEM;
> > +		}
> > +	}
> 
> Please use drm_malloc_ab here unconditionally;

We're not performing the same trick as drm_malloc_ab() here though, since
this is only used for a temporary allocation we try to consume any
high-order pages, rather than building an array of order-0 pages, knowing
that they will be released shortly afterwards.

> you've got a potential multiplication overflow,

Now this is more serious. Should we not just E2BIG any bo_create that will
require num_pages > MAXINT/sizeof(struct page*)? [1TiB on 32bit]
-Chris
Keith Packard May 15, 2011, 3:36 p.m. UTC | #3
On Sun, 15 May 2011 09:00:51 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:

> We're not performing the same trick as drm_malloc_ab() here though, since
> this is only used for a temporary allocation we try to consume any
> high-order pages, rather than building an array of order-0 pages, knowing
> that they will be released shortly afterwards.

If you want a 'large temporary allocator', then please feel free to
convince airlied that we need one in drm. For now, please just use the
existing drm function that provides the required interface and which
already has the necessary checks against overflow.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf32527..89b751d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -257,6 +257,73 @@  static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 		obj->tiling_mode != I915_TILING_NONE;
 }
 
+/**
+ * Magically retrieves the pages for the user addr whilst holding the
+ * dev->struct_mutex.
+ *
+ * Since we can not take the mm semaphore whilst holding our dev->struct_mutex,
+ * due to the pre-existing lock dependency established by i915_gem_fault(),
+ * we have to perform some sleight-of-hand.
+ *
+ * First, we try the lockless variant of get_user_pages(),
+ * __get_user_pages_fast(), whilst continuing to hold the mutex. If that
+ * fails to get all the user pages, then we no have choice but to acquire
+ * the mm semaphore, thus dropping the lock on dev->struct_mutex to do so.
+ * The dev->struct_mutex is then re-acquired before we return.
+ *
+ * Returns: an error code *and* the number of user pages acquired. Even
+ * on an error, you must iterate over the returned pages and release them.
+ */
+static int
+i915_gem_get_user_pages(struct drm_device *dev,
+			unsigned long addr,
+			bool write,
+			int *num_pages,
+			struct page ***pages_out)
+{
+	struct page **pages;
+	int pinned, ret;
+	int n = *num_pages;
+
+	pages = kmalloc(n*sizeof(struct page *),
+			GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (pages == NULL) {
+		pages = drm_malloc_ab(n, sizeof(struct page *));
+		if (pages == NULL) {
+			*pages_out = NULL;
+			*num_pages = 0;
+			return -ENOMEM;
+		}
+	}
+
+	pinned = __get_user_pages_fast(addr, n, write, pages);
+	if (pinned < n) {
+		struct mm_struct *mm = current->mm;
+
+		mutex_unlock(&dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm,
+				     addr + (pinned << PAGE_SHIFT),
+				     n - pinned,
+				     write, 0,
+				     pages + pinned,
+				     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+	}
+
+	ret = 0;
+	if (pinned < n)
+		ret = -EFAULT;
+
+	*num_pages = pinned;
+	*pages_out = pages;
+	return ret;
+}
+
+
 static inline void
 slow_shmem_copy(struct page *dst_page,
 		int dst_offset,
@@ -398,11 +465,11 @@  i915_gem_shmem_pread_slow(struct drm_device *dev,
 			  struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
 	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
+	loff_t offset;
+	loff_t first_data_page, last_data_page;
+	int num_pages, i;
 	int shmem_page_offset;
 	int data_page_index, data_page_offset;
 	int page_length;
@@ -420,20 +487,10 @@  i915_gem_shmem_pread_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 1, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
+	ret = i915_gem_get_user_pages(dev, data_ptr, true,
+				      &num_pages, &user_pages);
+	if (ret)
 		goto out;
-	}
 
 	ret = i915_gem_object_set_cpu_read_domain_range(obj,
 							args->offset,
@@ -494,7 +551,7 @@  i915_gem_shmem_pread_slow(struct drm_device *dev,
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++) {
+	for (i = 0; i < num_pages; i++) {
 		SetPageDirty(user_pages[i]);
 		mark_page_accessed(user_pages[i]);
 		page_cache_release(user_pages[i]);
@@ -679,10 +736,9 @@  i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	ssize_t remain;
 	loff_t gtt_page_base, offset;
-	loff_t first_data_page, last_data_page, num_pages;
-	loff_t pinned_pages, i;
+	loff_t first_data_page, last_data_page;
+	int num_pages, i;
 	struct page **user_pages;
-	struct mm_struct *mm = current->mm;
 	int gtt_page_offset, data_page_offset, data_page_index, page_length;
 	int ret;
 	uint64_t data_ptr = args->data_ptr;
@@ -697,28 +753,18 @@  i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
-		goto out_unpin_pages;
-	}
+	ret = i915_gem_get_user_pages(dev, data_ptr, false,
+				      &num_pages, &user_pages);
+	if (ret)
+		goto out;
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
-		goto out_unpin_pages;
+		goto out;
 
 	ret = i915_gem_object_put_fence(obj);
 	if (ret)
-		goto out_unpin_pages;
+		goto out;
 
 	offset = obj->gtt_offset + args->offset;
 
@@ -753,8 +799,8 @@  i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 		data_ptr += page_length;
 	}
 
-out_unpin_pages:
-	for (i = 0; i < pinned_pages; i++)
+out:
+	for (i = 0; i < num_pages; i++)
 		page_cache_release(user_pages[i]);
 	drm_free_large(user_pages);
 
@@ -803,11 +849,11 @@  i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 
-		vaddr = kmap_atomic(page, KM_USER0);
+		vaddr = kmap_atomic(page);
 		ret = __copy_from_user_inatomic(vaddr + page_offset,
 						user_data,
 						page_length);
-		kunmap_atomic(vaddr, KM_USER0);
+		kunmap_atomic(vaddr);
 
 		set_page_dirty(page);
 		mark_page_accessed(page);
@@ -842,11 +888,10 @@  i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 			   struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
 	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
+	loff_t first_data_page, last_data_page, offset;
+	int num_pages, i;
 	int shmem_page_offset;
 	int data_page_index,  data_page_offset;
 	int page_length;
@@ -864,20 +909,10 @@  i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
+	ret = i915_gem_get_user_pages(dev, data_ptr, false,
+				      &num_pages, &user_pages);
+	if (ret)
 		goto out;
-	}
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 	if (ret)
@@ -940,7 +975,7 @@  i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++)
+	for (i = 0; i < num_pages; i++)
 		page_cache_release(user_pages[i]);
 	drm_free_large(user_pages);