diff mbox

[4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects

Message ID 1437573109-19211-5-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

ankitprasad.r.sharma@intel.com July 22, 2015, 1:51 p.m. UTC
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>

This patch adds support for extending the pread/pwrite functionality
for objects not backed by shmem. The access will be made through
gtt interface.
This will cover prime objects as well as stolen memory backed objects
but for userptr objects it is still forbidden.

v2: Drop locks around slow_user_access, prefault the pages before
access (Chris)

v3: Rebased to the latest drm-intel-nightly (Ankit)

v4: Moved page base & offset calculations outside the copy loop,
corrected data types for size and offset variables, corrected if-else
braces format (Tvrtko/kerneldocs)

Testcase: igt/gem_stolen

Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 138 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 121 insertions(+), 17 deletions(-)

Comments

Tvrtko Ursulin July 22, 2015, 3:46 p.m. UTC | #1
Hi,

On 07/22/2015 02:51 PM, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> This patch adds support for extending the pread/pwrite functionality
> for objects not backed by shmem. The access will be made through
> gtt interface.
> This will cover prime objects as well as stolen memory backed objects
> but for userptr objects it is still forbidden.
>
> v2: Drop locks around slow_user_access, prefault the pages before
> access (Chris)
>
> v3: Rebased to the latest drm-intel-nightly (Ankit)
>
> v4: Moved page base & offset calculations outside the copy loop,
> corrected data types for size and offset variables, corrected if-else
> braces format (Tvrtko/kerneldocs)
>
> Testcase: igt/gem_stolen
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 138 +++++++++++++++++++++++++++++++++++-----
>   1 file changed, 121 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9e7e182..4321178 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -631,6 +631,102 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
>   	return ret ? - EFAULT : 0;
>   }
>
> +static inline uint64_t
> +slow_user_access(struct io_mapping *mapping,
> +		 uint64_t page_base, int page_offset,
> +		 char __user *user_data,
> +		 int length, bool pwrite)
> +{
> +	void __iomem *vaddr_inatomic;
> +	void *vaddr;
> +	uint64_t unwritten;
> +
> +	vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> +	/* We can use the cpu mem copy function because this is X86. */
> +	vaddr = (void __force *)vaddr_inatomic + page_offset;
> +	if (pwrite)
> +		unwritten = __copy_from_user(vaddr, user_data, length);
> +	else
> +		unwritten = __copy_to_user(user_data, vaddr, length);
> +
> +	io_mapping_unmap(vaddr_inatomic);
> +	return unwritten;
> +}
> +
> +static int
> +i915_gem_gtt_pread_pwrite(struct drm_device *dev,
> +			  struct drm_i915_gem_object *obj, uint64_t size,
> +			  uint64_t data_offset, uint64_t data_ptr, bool pwrite)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	char __user *user_data;
> +	uint64_t remain;
> +	uint64_t offset, page_base;
> +	int page_offset, page_length, ret = 0;
> +
> +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> +	if (ret)
> +		goto out;
> +
> +	ret = i915_gem_object_set_to_gtt_domain(obj, pwrite);
> +	if (ret)
> +		goto out_unpin;
> +
> +	ret = i915_gem_object_put_fence(obj);
> +	if (ret)
> +		goto out_unpin;
> +
> +	user_data = to_user_ptr(data_ptr);
> +	remain = size;
> +	offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> +
> +	if (pwrite)
> +		intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> +
> +	mutex_unlock(&dev->struct_mutex);
> +	if (!pwrite && likely(!i915.prefault_disable))
> +		ret = fault_in_multipages_writeable(user_data, remain);
> +
> +	/*
> +	 * page_offset = offset within page
> +	 * page_base = page offset within aperture
> +	 */
> +	page_offset = offset_in_page(offset);
> +	page_base = offset & PAGE_MASK;
> +
> +	while (remain > 0) {
> +		/* page_length = bytes to copy for this page */
> +		page_length = remain;
> +		if ((page_offset + remain) > PAGE_SIZE)
> +			page_length = PAGE_SIZE - page_offset;
> +
> +		/* This is a slow read/write as it tries to read from
> +		 * and write to user memory which may result into page
> +		 * faults
> +		 */
> +		ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> +				       page_offset, user_data,
> +				       page_length, pwrite);
> +
> +		if (ret) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		remain -= page_length;
> +		user_data += page_length;
> +		page_base += page_length;
> +		page_offset = 0;
> +	}
> +
> +	mutex_lock(&dev->struct_mutex);

My objection here was that we are re-acquiring the mutex in 
non-interruptible mode, while the caller had it interruptible. It am not 
100% what the conclusion back then was?

But also, why it is even necessary to drop the mutex here?

Regards,

Tvrtko
Daniel Vetter July 22, 2015, 4:05 p.m. UTC | #2
On Wed, Jul 22, 2015 at 04:46:00PM +0100, Tvrtko Ursulin wrote:
> But also, why it is even necessary to drop the mutex here?

Locking inversion with our own pagefault handler. Happens since at least
mesa can return gtt mmaps to gl clients, which can then in turn try to
upload that with pwrite somewhere else. Also userspace could be just plain
nasty and try to deadlock the kernel ;-)

Dropping the struct_mutex is SOP for the copy_from/to_user slowpath.
-Daniel
Tvrtko Ursulin July 22, 2015, 4:17 p.m. UTC | #3
On 07/22/2015 05:05 PM, Daniel Vetter wrote:
> On Wed, Jul 22, 2015 at 04:46:00PM +0100, Tvrtko Ursulin wrote:
>> But also, why it is even necessary to drop the mutex here?
>
> Locking inversion with our own pagefault handler. Happens since at least
> mesa can return gtt mmaps to gl clients, which can then in turn try to
> upload that with pwrite somewhere else. Also userspace could be just plain
> nasty and try to deadlock the kernel ;-)

Yeah blame userspace. ;D

> Dropping the struct_mutex is SOP for the copy_from/to_user slowpath.

I suspected something like that but did not feel like thinking too much. 
:) When encountering unusual things like dropping and re-acquiring locks 
I expect to see comments explaining it.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9e7e182..4321178 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -631,6 +631,102 @@  shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 	return ret ? - EFAULT : 0;
 }
 
+static inline uint64_t
+slow_user_access(struct io_mapping *mapping,
+		 uint64_t page_base, int page_offset,
+		 char __user *user_data,
+		 int length, bool pwrite)
+{
+	void __iomem *vaddr_inatomic;
+	void *vaddr;
+	uint64_t unwritten;
+
+	vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
+	/* We can use the cpu mem copy function because this is X86. */
+	vaddr = (void __force *)vaddr_inatomic + page_offset;
+	if (pwrite)
+		unwritten = __copy_from_user(vaddr, user_data, length);
+	else
+		unwritten = __copy_to_user(user_data, vaddr, length);
+
+	io_mapping_unmap(vaddr_inatomic);
+	return unwritten;
+}
+
+static int
+i915_gem_gtt_pread_pwrite(struct drm_device *dev,
+			  struct drm_i915_gem_object *obj, uint64_t size,
+			  uint64_t data_offset, uint64_t data_ptr, bool pwrite)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	char __user *user_data;
+	uint64_t remain;
+	uint64_t offset, page_base;
+	int page_offset, page_length, ret = 0;
+
+	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+	if (ret)
+		goto out;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, pwrite);
+	if (ret)
+		goto out_unpin;
+
+	ret = i915_gem_object_put_fence(obj);
+	if (ret)
+		goto out_unpin;
+
+	user_data = to_user_ptr(data_ptr);
+	remain = size;
+	offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
+
+	if (pwrite)
+		intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+
+	mutex_unlock(&dev->struct_mutex);
+	if (!pwrite && likely(!i915.prefault_disable))
+		ret = fault_in_multipages_writeable(user_data, remain);
+
+	/*
+	 * page_offset = offset within page
+	 * page_base = page offset within aperture
+	 */
+	page_offset = offset_in_page(offset);
+	page_base = offset & PAGE_MASK;
+
+	while (remain > 0) {
+		/* page_length = bytes to copy for this page */
+		page_length = remain;
+		if ((page_offset + remain) > PAGE_SIZE)
+			page_length = PAGE_SIZE - page_offset;
+
+		/* This is a slow read/write as it tries to read from
+		 * and write to user memory which may result into page
+		 * faults
+		 */
+		ret = slow_user_access(dev_priv->gtt.mappable, page_base,
+				       page_offset, user_data,
+				       page_length, pwrite);
+
+		if (ret) {
+			ret = -EFAULT;
+			break;
+		}
+
+		remain -= page_length;
+		user_data += page_length;
+		page_base += page_length;
+		page_offset = 0;
+	}
+
+	mutex_lock(&dev->struct_mutex);
+
+out_unpin:
+	i915_gem_object_ggtt_unpin(obj);
+out:
+	return ret;
+}
+
 static int
 i915_gem_shmem_pread(struct drm_device *dev,
 		     struct drm_i915_gem_object *obj,
@@ -754,17 +850,20 @@  i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/* prime objects have no backing filp to GEM pread/pwrite
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	trace_i915_gem_object_pread(obj, args->offset, args->size);
 
-	ret = i915_gem_shmem_pread(dev, obj, args, file);
+	/* pread for non shmem backed objects */
+	if (!obj->base.filp) {
+		if (obj->tiling_mode == I915_TILING_NONE)
+			ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+							args->offset,
+							args->data_ptr,
+							false);
+		else
+			ret = -EINVAL;
+	} else {
+		ret = i915_gem_shmem_pread(dev, obj, args, file);
+	}
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -1105,17 +1204,22 @@  i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/* prime objects have no backing filp to GEM pread/pwrite
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
 	ret = -EFAULT;
+
+	/* pwrite for non shmem backed objects */
+	if (!obj->base.filp) {
+		if (obj->tiling_mode == I915_TILING_NONE)
+			ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+							args->offset,
+							args->data_ptr,
+							true);
+		else
+			ret = -EINVAL;
+
+		goto out;
+	}
 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
 	 * it would end up going through the fenced access, and we'll get
 	 * different detiling behavior between reading and writing.