diff mbox

[5/6] drm/i915: Support for pread/pwrite from/to non shmem backed objects

Message ID 1449665182-10054-6-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

ankitprasad.r.sharma@intel.com Dec. 9, 2015, 12:46 p.m. UTC
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>

This patch adds support for extending the pread/pwrite functionality
for objects not backed by shmem. The access will be made through
gtt interface. This will cover objects backed by stolen memory as well
as other non-shmem backed objects.

v2: Drop locks around slow_user_access, prefault the pages before
access (Chris)

v3: Rebased to the latest drm-intel-nightly (Ankit)

v4: Moved page base & offset calculations outside the copy loop,
corrected data types for size and offset variables, corrected if-else
braces format (Tvrtko/kerneldocs)

v5: Enabled pread/pwrite for all non-shmem backed objects including
without tiling restrictions (Ankit)

v6: Using pwrite_fast for non-shmem backed objects as well (Chris)

v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy,
added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)

v8: Updated v7 commit message, mutex unlock around pwrite slow path for
non-shmem backed objects (Tvrtko)

Testcase: igt/gem_stolen

Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 151 +++++++++++++++++++++++++++++++++-------
 1 file changed, 127 insertions(+), 24 deletions(-)

Comments

Tvrtko Ursulin Dec. 9, 2015, 4:15 p.m. UTC | #1
Hi,

On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> This patch adds support for extending the pread/pwrite functionality
> for objects not backed by shmem. The access will be made through
> gtt interface. This will cover objects backed by stolen memory as well
> as other non-shmem backed objects.
>
> v2: Drop locks around slow_user_access, prefault the pages before
> access (Chris)
>
> v3: Rebased to the latest drm-intel-nightly (Ankit)
>
> v4: Moved page base & offset calculations outside the copy loop,
> corrected data types for size and offset variables, corrected if-else
> braces format (Tvrtko/kerneldocs)
>
> v5: Enabled pread/pwrite for all non-shmem backed objects including
> without tiling restrictions (Ankit)
>
> v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
>
> v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy,
> added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
>
> v8: Updated v7 commit message, mutex unlock around pwrite slow path for
> non-shmem backed objects (Tvrtko)
>
> Testcase: igt/gem_stolen
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 151 +++++++++++++++++++++++++++++++++-------
>   1 file changed, 127 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ed97de6..68ed67a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
>   	return ret ? - EFAULT : 0;
>   }
>
> +static inline uint64_t
> +slow_user_access(struct io_mapping *mapping,
> +		 uint64_t page_base, int page_offset,
> +		 char __user *user_data,
> +		 int length, bool pwrite)
> +{
> +	void __iomem *vaddr_inatomic;
> +	void *vaddr;
> +	uint64_t unwritten;
> +
> +	vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> +	/* We can use the cpu mem copy function because this is X86. */
> +	vaddr = (void __force *)vaddr_inatomic + page_offset;
> +	if (pwrite)
> +		unwritten = __copy_from_user(vaddr, user_data, length);
> +	else
> +		unwritten = __copy_to_user(user_data, vaddr, length);
> +
> +	io_mapping_unmap(vaddr_inatomic);
> +	return unwritten;
> +}
> +
> +static int
> +i915_gem_gtt_copy(struct drm_device *dev,
> +		   struct drm_i915_gem_object *obj, uint64_t size,
> +		   uint64_t data_offset, uint64_t data_ptr)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	char __user *user_data;
> +	uint64_t remain;
> +	uint64_t offset, page_base;
> +	int page_offset, page_length, ret = 0;
> +
> +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> +	if (ret)
> +		goto out;
> +
> +	ret = i915_gem_object_set_to_gtt_domain(obj, false);
> +	if (ret)
> +		goto out_unpin;
> +
> +	ret = i915_gem_object_put_fence(obj);
> +	if (ret)
> +		goto out_unpin;
> +
> +	user_data = to_user_ptr(data_ptr);
> +	remain = size;
> +	offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> +
> +	mutex_unlock(&dev->struct_mutex);
> +	if (likely(!i915.prefault_disable))
> +		ret = fault_in_multipages_writeable(user_data, remain);
> +
> +	/*
> +	 * page_offset = offset within page
> +	 * page_base = page offset within aperture
> +	 */
> +	page_offset = offset_in_page(offset);
> +	page_base = offset & PAGE_MASK;
> +
> +	while (remain > 0) {
> +		/* page_length = bytes to copy for this page */
> +		page_length = remain;
> +		if ((page_offset + remain) > PAGE_SIZE)
> +			page_length = PAGE_SIZE - page_offset;
> +
> +		/* This is a slow read/write as it tries to read from
> +		 * and write to user memory which may result into page
> +		 * faults
> +		 */
> +		ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> +				       page_offset, user_data,
> +				       page_length, false);
> +
> +		if (ret) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		remain -= page_length;
> +		user_data += page_length;
> +		page_base += page_length;
> +		page_offset = 0;
> +	}
> +
> +	mutex_lock(&dev->struct_mutex);
> +
> +out_unpin:
> +	i915_gem_object_ggtt_unpin(obj);
> +out:
> +	return ret;
> +}
> +
>   static int
>   i915_gem_shmem_pread(struct drm_device *dev,
>   		     struct drm_i915_gem_object *obj,
> @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>   		goto out;
>   	}
>
> -	/* prime objects have no backing filp to GEM pread/pwrite
> -	 * pages from.
> -	 */
> -	if (!obj->base.filp) {
> -		ret = -EINVAL;
> -		goto out;
> -	}
> -
>   	trace_i915_gem_object_pread(obj, args->offset, args->size);
>
> -	ret = i915_gem_shmem_pread(dev, obj, args, file);
> +	/* pread for non shmem backed objects */
> +	if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
> +		ret = i915_gem_gtt_copy(dev, obj, args->size,
> +					args->offset, args->data_ptr);
> +	else
> +		ret = i915_gem_shmem_pread(dev, obj, args, file);

Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed 
objects if tiling is set. Sounds wrong to me unless I am missing something?

>
>   out:
>   	drm_gem_object_unreference(&obj->base);
> @@ -789,10 +879,12 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>   			 struct drm_i915_gem_pwrite *args,
>   			 struct drm_file *file)
>   {
> +	struct drm_device *dev = obj->base.dev;
>   	struct drm_mm_node node;
>   	uint64_t remain, offset;
>   	char __user *user_data;
>   	int ret;
> +	bool faulted = false;
>
>   	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
>   	if (ret) {
> @@ -851,11 +943,29 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>   		/* If we get a fault while copying data, then (presumably) our
>   		 * source page isn't available.  Return the error and we'll
>   		 * retry in the slow path.
> +		 * If the object is non-shmem backed, we retry again with the
> +		 * path that handles page fault.
>   		 */
> -		if (fast_user_write(i915->gtt.mappable, page_base,
> -				    page_offset, user_data, page_length)) {
> -			ret = -EFAULT;
> -			goto out_flush;
> +		if (faulted || fast_user_write(i915->gtt.mappable,
> +						page_base, page_offset,
> +						user_data, page_length)) {
> +			if (!obj->base.filp) {
> +				faulted = true;
> +				mutex_unlock(&dev->struct_mutex);
> +				if (slow_user_access(i915->gtt.mappable,
> +						     page_base,
> +						     page_offset, user_data,
> +						     page_length, true)) {
> +					ret = -EFAULT;
> +					mutex_lock(&dev->struct_mutex);
> +					goto out_flush;
> +				}
> +
> +				mutex_lock(&dev->struct_mutex);
> +			} else {
> +				ret = -EFAULT;
> +				goto out_flush;
> +			}
>   		}

Some questions:

1. What is the advantage of doing the slow access for non-shmem backed 
objects inside a single loop, as opposed to extracting it in a separate 
function?

For example i915_gem_gtt_pwrite_slow ? Then it could have been called 
from i915_gem_pwrite_ioctl depending on the master if statement there, 
fallback etc.

I think it would be clearer unless there is a special reason it makes 
sense to go with the fast path first and then switch to slow path at the 
point first fault is hit.

2. I have noticed the shmem pwrite slowpath makes explicit mention of 
potential changes to the object domain while the lock was dropped and 
takes care of flushing the cache in that case.

Is this something this path should do as well, or if not why not?


>   		remain -= page_length;
> @@ -1121,14 +1231,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>   		goto out;
>   	}
>
> -	/* prime objects have no backing filp to GEM pread/pwrite
> -	 * pages from.
> -	 */
> -	if (!obj->base.filp) {
> -		ret = -EINVAL;
> -		goto out;
> -	}
> -
>   	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
>
>   	ret = -EFAULT;
> @@ -1139,8 +1241,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>   	 * perspective, requiring manual detiling by the client.
>   	 */
>   	if (obj->tiling_mode == I915_TILING_NONE &&
> -	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> -	    cpu_write_needs_clflush(obj)) {
> +	    (!obj->base.filp ||
> +	    (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> +	    cpu_write_needs_clflush(obj)))) {
>   		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
>   		/* Note that the gtt paths might fail with non-page-backed user
>   		 * pointers (e.g. gtt mappings when moving data between
> @@ -1150,7 +1253,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>   	if (ret == -EFAULT || ret == -ENOSPC) {
>   		if (obj->phys_handle)
>   			ret = i915_gem_phys_pwrite(obj, args, file);
> -		else
> +		else if (obj->base.filp)
>   			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
>   	}
>
>

Regards,

Tvrtko
Dave Gordon Dec. 9, 2015, 7:39 p.m. UTC | #2
On 09/12/15 16:15, Tvrtko Ursulin wrote:
>
> Hi,
>
> On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
>> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>
>> This patch adds support for extending the pread/pwrite functionality
>> for objects not backed by shmem. The access will be made through
>> gtt interface. This will cover objects backed by stolen memory as well
>> as other non-shmem backed objects.
>>
>> v2: Drop locks around slow_user_access, prefault the pages before
>> access (Chris)
>>
>> v3: Rebased to the latest drm-intel-nightly (Ankit)
>>
>> v4: Moved page base & offset calculations outside the copy loop,
>> corrected data types for size and offset variables, corrected if-else
>> braces format (Tvrtko/kerneldocs)
>>
>> v5: Enabled pread/pwrite for all non-shmem backed objects including
>> without tiling restrictions (Ankit)
>>
>> v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
>>
>> v7: Updated commit message, Renamed i915_gem_gtt_read to
>> i915_gem_gtt_copy,
>> added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
>>
>> v8: Updated v7 commit message, mutex unlock around pwrite slow path for
>> non-shmem backed objects (Tvrtko)
>>
>> Testcase: igt/gem_stolen
>>
>> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c | 151
>> +++++++++++++++++++++++++++++++++-------
>>   1 file changed, 127 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index ed97de6..68ed67a 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int
>> shmem_page_offset, int page_length,
>>       return ret ? - EFAULT : 0;
>>   }
>>
>> +static inline uint64_t
>> +slow_user_access(struct io_mapping *mapping,
>> +         uint64_t page_base, int page_offset,
>> +         char __user *user_data,
>> +         int length, bool pwrite)
>> +{
>> +    void __iomem *vaddr_inatomic;
>> +    void *vaddr;
>> +    uint64_t unwritten;
>> +
>> +    vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
>> +    /* We can use the cpu mem copy function because this is X86. */
>> +    vaddr = (void __force *)vaddr_inatomic + page_offset;
>> +    if (pwrite)
>> +        unwritten = __copy_from_user(vaddr, user_data, length);
>> +    else
>> +        unwritten = __copy_to_user(user_data, vaddr, length);
>> +
>> +    io_mapping_unmap(vaddr_inatomic);
>> +    return unwritten;
>> +}
>> +
>> +static int
>> +i915_gem_gtt_copy(struct drm_device *dev,
>> +           struct drm_i915_gem_object *obj, uint64_t size,
>> +           uint64_t data_offset, uint64_t data_ptr)
>> +{
>> +    struct drm_i915_private *dev_priv = dev->dev_private;
>> +    char __user *user_data;
>> +    uint64_t remain;
>> +    uint64_t offset, page_base;
>> +    int page_offset, page_length, ret = 0;
>> +
>> +    ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
>> +    if (ret)
>> +        goto out;
>> +
>> +    ret = i915_gem_object_set_to_gtt_domain(obj, false);
>> +    if (ret)
>> +        goto out_unpin;
>> +
>> +    ret = i915_gem_object_put_fence(obj);
>> +    if (ret)
>> +        goto out_unpin;
>> +
>> +    user_data = to_user_ptr(data_ptr);
>> +    remain = size;
>> +    offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
>> +
>> +    mutex_unlock(&dev->struct_mutex);
>> +    if (likely(!i915.prefault_disable))
>> +        ret = fault_in_multipages_writeable(user_data, remain);
>> +
>> +    /*
>> +     * page_offset = offset within page
>> +     * page_base = page offset within aperture
>> +     */
>> +    page_offset = offset_in_page(offset);
>> +    page_base = offset & PAGE_MASK;
>> +
>> +    while (remain > 0) {
>> +        /* page_length = bytes to copy for this page */
>> +        page_length = remain;
>> +        if ((page_offset + remain) > PAGE_SIZE)
>> +            page_length = PAGE_SIZE - page_offset;
>> +
>> +        /* This is a slow read/write as it tries to read from
>> +         * and write to user memory which may result into page
>> +         * faults
>> +         */
>> +        ret = slow_user_access(dev_priv->gtt.mappable, page_base,
>> +                       page_offset, user_data,
>> +                       page_length, false);
>> +
>> +        if (ret) {
>> +            ret = -EFAULT;
>> +            break;
>> +        }
>> +
>> +        remain -= page_length;
>> +        user_data += page_length;
>> +        page_base += page_length;
>> +        page_offset = 0;
>> +    }
>> +
>> +    mutex_lock(&dev->struct_mutex);
>> +
>> +out_unpin:
>> +    i915_gem_object_ggtt_unpin(obj);
>> +out:
>> +    return ret;
>> +}
>> +
>>   static int
>>   i915_gem_shmem_pread(struct drm_device *dev,
>>                struct drm_i915_gem_object *obj,
>> @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev,
>> void *data,
>>           goto out;
>>       }
>>
>> -    /* prime objects have no backing filp to GEM pread/pwrite
>> -     * pages from.
>> -     */
>> -    if (!obj->base.filp) {
>> -        ret = -EINVAL;
>> -        goto out;
>> -    }
>> -
>>       trace_i915_gem_object_pread(obj, args->offset, args->size);
>>
>> -    ret = i915_gem_shmem_pread(dev, obj, args, file);
>> +    /* pread for non shmem backed objects */
>> +    if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
>> +        ret = i915_gem_gtt_copy(dev, obj, args->size,
>> +                    args->offset, args->data_ptr);
>> +    else
>> +        ret = i915_gem_shmem_pread(dev, obj, args, file);
>
> Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed
> objects if tiling is set. Sounds wrong to me unless I am missing something?

Which GEM objects have obj->base.filp set? Is it ONLY regular gtt-type 
objects? What about (phys, stolen, userptr, dmabuf, ...?) Which of these 
is the alternate path going to work with?

.Dave.
ankitprasad.r.sharma@intel.com Dec. 10, 2015, 10:54 a.m. UTC | #3
On Wed, 2015-12-09 at 16:15 +0000, Tvrtko Ursulin wrote:
> Hi,
> 
> On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > This patch adds support for extending the pread/pwrite functionality
> > for objects not backed by shmem. The access will be made through
> > gtt interface. This will cover objects backed by stolen memory as well
> > as other non-shmem backed objects.
> >
> > v2: Drop locks around slow_user_access, prefault the pages before
> > access (Chris)
> >
> > v3: Rebased to the latest drm-intel-nightly (Ankit)
> >
> > v4: Moved page base & offset calculations outside the copy loop,
> > corrected data types for size and offset variables, corrected if-else
> > braces format (Tvrtko/kerneldocs)
> >
> > v5: Enabled pread/pwrite for all non-shmem backed objects including
> > without tiling restrictions (Ankit)
> >
> > v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
> >
> > v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy,
> > added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
> >
> > v8: Updated v7 commit message, mutex unlock around pwrite slow path for
> > non-shmem backed objects (Tvrtko)
> >
> > Testcase: igt/gem_stolen
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 151 +++++++++++++++++++++++++++++++++-------
> >   1 file changed, 127 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index ed97de6..68ed67a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
> >   	return ret ? - EFAULT : 0;
> >   }
> >
> > +static inline uint64_t
> > +slow_user_access(struct io_mapping *mapping,
> > +		 uint64_t page_base, int page_offset,
> > +		 char __user *user_data,
> > +		 int length, bool pwrite)
> > +{
> > +	void __iomem *vaddr_inatomic;
> > +	void *vaddr;
> > +	uint64_t unwritten;
> > +
> > +	vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> > +	/* We can use the cpu mem copy function because this is X86. */
> > +	vaddr = (void __force *)vaddr_inatomic + page_offset;
> > +	if (pwrite)
> > +		unwritten = __copy_from_user(vaddr, user_data, length);
> > +	else
> > +		unwritten = __copy_to_user(user_data, vaddr, length);
> > +
> > +	io_mapping_unmap(vaddr_inatomic);
> > +	return unwritten;
> > +}
> > +
> > +static int
> > +i915_gem_gtt_copy(struct drm_device *dev,
> > +		   struct drm_i915_gem_object *obj, uint64_t size,
> > +		   uint64_t data_offset, uint64_t data_ptr)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	char __user *user_data;
> > +	uint64_t remain;
> > +	uint64_t offset, page_base;
> > +	int page_offset, page_length, ret = 0;
> > +
> > +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> > +	if (ret)
> > +		goto out;
> > +
> > +	ret = i915_gem_object_set_to_gtt_domain(obj, false);
> > +	if (ret)
> > +		goto out_unpin;
> > +
> > +	ret = i915_gem_object_put_fence(obj);
> > +	if (ret)
> > +		goto out_unpin;
> > +
> > +	user_data = to_user_ptr(data_ptr);
> > +	remain = size;
> > +	offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> > +
> > +	mutex_unlock(&dev->struct_mutex);
> > +	if (likely(!i915.prefault_disable))
> > +		ret = fault_in_multipages_writeable(user_data, remain);
> > +
> > +	/*
> > +	 * page_offset = offset within page
> > +	 * page_base = page offset within aperture
> > +	 */
> > +	page_offset = offset_in_page(offset);
> > +	page_base = offset & PAGE_MASK;
> > +
> > +	while (remain > 0) {
> > +		/* page_length = bytes to copy for this page */
> > +		page_length = remain;
> > +		if ((page_offset + remain) > PAGE_SIZE)
> > +			page_length = PAGE_SIZE - page_offset;
> > +
> > +		/* This is a slow read/write as it tries to read from
> > +		 * and write to user memory which may result into page
> > +		 * faults
> > +		 */
> > +		ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> > +				       page_offset, user_data,
> > +				       page_length, false);
> > +
> > +		if (ret) {
> > +			ret = -EFAULT;
> > +			break;
> > +		}
> > +
> > +		remain -= page_length;
> > +		user_data += page_length;
> > +		page_base += page_length;
> > +		page_offset = 0;
> > +	}
> > +
> > +	mutex_lock(&dev->struct_mutex);
> > +
> > +out_unpin:
> > +	i915_gem_object_ggtt_unpin(obj);
> > +out:
> > +	return ret;
> > +}
> > +
> >   static int
> >   i915_gem_shmem_pread(struct drm_device *dev,
> >   		     struct drm_i915_gem_object *obj,
> > @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
> >   		goto out;
> >   	}
> >
> > -	/* prime objects have no backing filp to GEM pread/pwrite
> > -	 * pages from.
> > -	 */
> > -	if (!obj->base.filp) {
> > -		ret = -EINVAL;
> > -		goto out;
> > -	}
> > -
> >   	trace_i915_gem_object_pread(obj, args->offset, args->size);
> >
> > -	ret = i915_gem_shmem_pread(dev, obj, args, file);
> > +	/* pread for non shmem backed objects */
> > +	if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
> > +		ret = i915_gem_gtt_copy(dev, obj, args->size,
> > +					args->offset, args->data_ptr);
> > +	else
> > +		ret = i915_gem_shmem_pread(dev, obj, args, file);
> 
> Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed 
> objects if tiling is set. Sounds wrong to me unless I am missing something?
> 
Thanks for pointing it out, need to add a check there.
> >
> >   out:
> >   	drm_gem_object_unreference(&obj->base);
> > @@ -789,10 +879,12 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> >   			 struct drm_i915_gem_pwrite *args,
> >   			 struct drm_file *file)
> >   {
> > +	struct drm_device *dev = obj->base.dev;
> >   	struct drm_mm_node node;
> >   	uint64_t remain, offset;
> >   	char __user *user_data;
> >   	int ret;
> > +	bool faulted = false;
> >
> >   	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> >   	if (ret) {
> > @@ -851,11 +943,29 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> >   		/* If we get a fault while copying data, then (presumably) our
> >   		 * source page isn't available.  Return the error and we'll
> >   		 * retry in the slow path.
> > +		 * If the object is non-shmem backed, we retry again with the
> > +		 * path that handles page fault.
> >   		 */
> > -		if (fast_user_write(i915->gtt.mappable, page_base,
> > -				    page_offset, user_data, page_length)) {
> > -			ret = -EFAULT;
> > -			goto out_flush;
> > +		if (faulted || fast_user_write(i915->gtt.mappable,
> > +						page_base, page_offset,
> > +						user_data, page_length)) {
> > +			if (!obj->base.filp) {
> > +				faulted = true;
> > +				mutex_unlock(&dev->struct_mutex);
> > +				if (slow_user_access(i915->gtt.mappable,
> > +						     page_base,
> > +						     page_offset, user_data,
> > +						     page_length, true)) {
> > +					ret = -EFAULT;
> > +					mutex_lock(&dev->struct_mutex);
> > +					goto out_flush;
> > +				}
> > +
> > +				mutex_lock(&dev->struct_mutex);
> > +			} else {
> > +				ret = -EFAULT;
> > +				goto out_flush;
> > +			}
> >   		}
> 
> Some questions:
> 
> 1. What is the advantage of doing the slow access for non-shmem backed 
> objects inside a single loop, as opposed to extracting it in a separate 
> function?
> 
> For example i915_gem_gtt_pwrite_slow ? Then it could have been called 
> from i915_gem_pwrite_ioctl depending on the master if statement there, 
> fallback etc.
> 
> I think it would be clearer unless there is a special reason it makes 
> sense to go with the fast path first and then switch to slow path at the 
> point first fault is hit.
I am ready for any of the approach, but Chris suggested to extend
pwrite_fast as it is already being used for faster pwrites.

Chris,
Would it be better to do a pwrite to non-shmem backed objects via a
separate function?

> 
> 2. I have noticed the shmem pwrite slowpath makes explicit mention of 
> potential changes to the object domain while the lock was dropped and 
> takes care of flushing the cache in that case.
> 
> Is this something this path should do as well, or if not why not?
I do not think that this path needs to take care of flushing the cache,
as for stolen backed objects are not accessible to CPU hence no
possibility of it being in the CPU cache atleast for the stolen-backed
objects. 
For other non-shmem backed objects (dmabuf, usrptr, phys), may need some
inputs from Chris on how to handle it.

> 
> 
> >   		remain -= page_length;
> > @@ -1121,14 +1231,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >   		goto out;
> >   	}
> >
> > -	/* prime objects have no backing filp to GEM pread/pwrite
> > -	 * pages from.
> > -	 */
> > -	if (!obj->base.filp) {
> > -		ret = -EINVAL;
> > -		goto out;
> > -	}
> > -
> >   	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
> >
> >   	ret = -EFAULT;
> > @@ -1139,8 +1241,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >   	 * perspective, requiring manual detiling by the client.
> >   	 */
> >   	if (obj->tiling_mode == I915_TILING_NONE &&
> > -	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> > -	    cpu_write_needs_clflush(obj)) {
> > +	    (!obj->base.filp ||
> > +	    (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> > +	    cpu_write_needs_clflush(obj)))) {
> >   		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
> >   		/* Note that the gtt paths might fail with non-page-backed user
> >   		 * pointers (e.g. gtt mappings when moving data between
> > @@ -1150,7 +1253,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >   	if (ret == -EFAULT || ret == -ENOSPC) {
> >   		if (obj->phys_handle)
> >   			ret = i915_gem_phys_pwrite(obj, args, file);
> > -		else
> > +		else if (obj->base.filp)
> >   			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> >   	}
> >
> >
> 
> Regards,
> 
> Tvrtko
>
ankitprasad.r.sharma@intel.com Dec. 10, 2015, 11 a.m. UTC | #4
On Thu, 2015-12-10 at 16:24 +0530, Ankitprasad Sharma wrote:
Missed Chris in last mail, adding him
On Wed, 2015-12-09 at 16:15 +0000, Tvrtko Ursulin wrote:
> Hi,
> 
> On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > This patch adds support for extending the pread/pwrite functionality
> > for objects not backed by shmem. The access will be made through
> > gtt interface. This will cover objects backed by stolen memory as well
> > as other non-shmem backed objects.
> >
> > v2: Drop locks around slow_user_access, prefault the pages before
> > access (Chris)
> >
> > v3: Rebased to the latest drm-intel-nightly (Ankit)
> >
> > v4: Moved page base & offset calculations outside the copy loop,
> > corrected data types for size and offset variables, corrected if-else
> > braces format (Tvrtko/kerneldocs)
> >
> > v5: Enabled pread/pwrite for all non-shmem backed objects including
> > without tiling restrictions (Ankit)
> >
> > v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
> >
> > v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy,
> > added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
> >
> > v8: Updated v7 commit message, mutex unlock around pwrite slow path for
> > non-shmem backed objects (Tvrtko)
> >
> > Testcase: igt/gem_stolen
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 151 +++++++++++++++++++++++++++++++++-------
> >   1 file changed, 127 insertions(+), 24 deletions(-)> >

> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index ed97de6..68ed67a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
> >   	return ret ? - EFAULT : 0;
> >   }
> >
> > +static inline uint64_t
> > +slow_user_access(struct io_mapping *mapping,
> > +		 uint64_t page_base, int page_offset,
> > +		 char __user *user_data,
> > +		 int length, bool pwrite)
> > +{
> > +	void __iomem *vaddr_inatomic;
> > +	void *vaddr;
> > +	uint64_t unwritten;
> > +
> > +	vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> > +	/* We can use the cpu mem copy function because this is X86. */
> > +	vaddr = (void __force *)vaddr_inatomic + page_offset;
> > +	if (pwrite)
> > +		unwritten = __copy_from_user(vaddr, user_data, length);
> > +	else
> > +		unwritten = __copy_to_user(user_data, vaddr, length);
> > +
> > +	io_mapping_unmap(vaddr_inatomic);
> > +	return unwritten;
> > +}
> > +
> > +static int
> > +i915_gem_gtt_copy(struct drm_device *dev,
> > +		   struct drm_i915_gem_object *obj, uint64_t size,
> > +		   uint64_t data_offset, uint64_t data_ptr)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	char __user *user_data;
> > +	uint64_t remain;
> > +	uint64_t offset, page_base;
> > +	int page_offset, page_length, ret = 0;
> > +
> > +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> > +	if (ret)
> > +		goto out;
> > +
> > +	ret = i915_gem_object_set_to_gtt_domain(obj, false);
> > +	if (ret)
> > +		goto out_unpin;
> > +
> > +	ret = i915_gem_object_put_fence(obj);
> > +	if (ret)
> > +		goto out_unpin;
> > +
> > +	user_data = to_user_ptr(data_ptr);
> > +	remain = size;
> > +	offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> > +
> > +	mutex_unlock(&dev->struct_mutex);
> > +	if (likely(!i915.prefault_disable))
> > +		ret = fault_in_multipages_writeable(user_data, remain);
> > +
> > +	/*
> > +	 * page_offset = offset within page
> > +	 * page_base = page offset within aperture
> > +	 */
> > +	page_offset = offset_in_page(offset);
> > +	page_base = offset & PAGE_MASK;
> > +
> > +	while (remain > 0) {
> > +		/* page_length = bytes to copy for this page */
> > +		page_length = remain;
> > +		if ((page_offset + remain) > PAGE_SIZE)
> > +			page_length = PAGE_SIZE - page_offset;
> > +
> > +		/* This is a slow read/write as it tries to read from
> > +		 * and write to user memory which may result into page
> > +		 * faults
> > +		 */
> > +		ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> > +				       page_offset, user_data,
> > +				       page_length, false);
> > +
> > +		if (ret) {
> > +			ret = -EFAULT;
> > +			break;
> > +		}
> > +
> > +		remain -= page_length;
> > +		user_data += page_length;
> > +		page_base += page_length;
> > +		page_offset = 0;
> > +	}
> > +
> > +	mutex_lock(&dev->struct_mutex);
> > +
> > +out_unpin:
> > +	i915_gem_object_ggtt_unpin(obj);
> > +out:
> > +	return ret;
> > +}
> > +
> >   static int
> >   i915_gem_shmem_pread(struct drm_device *dev,
> >   		     struct drm_i915_gem_object *obj,
> > @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
> >   		goto out;
> >   	}
> >
> > -	/* prime objects have no backing filp to GEM pread/pwrite
> > -	 * pages from.
> > -	 */
> > -	if (!obj->base.filp) {
> > -		ret = -EINVAL;
> > -		goto out;
> > -	}
> > -
> >   	trace_i915_gem_object_pread(obj, args->offset, args->size);
> >
> > -	ret = i915_gem_shmem_pread(dev, obj, args, file);
> > +	/* pread for non shmem backed objects */
> > +	if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
> > +		ret = i915_gem_gtt_copy(dev, obj, args->size,
> > +					args->offset, args->data_ptr);
> > +	else
> > +		ret = i915_gem_shmem_pread(dev, obj, args, file);
> 
> Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed 
> objects if tiling is set. Sounds wrong to me unless I am missing something?
> 
Thanks for pointing it out, need to add a check there.
> >
> >   out:
> >   	drm_gem_object_unreference(&obj->base);
> > @@ -789,10 +879,12 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> >   			 struct drm_i915_gem_pwrite *args,
> >   			 struct drm_file *file)
> >   {
> > +	struct drm_device *dev = obj->base.dev;
> >   	struct drm_mm_node node;
> >   	uint64_t remain, offset;
> >   	char __user *user_data;
> >   	int ret;
> > +	bool faulted = false;
> >
> >   	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> >   	if (ret) {
> > @@ -851,11 +943,29 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> >   		/* If we get a fault while copying data, then (presumably) our
> >   		 * source page isn't available.  Return the error and we'll
> >   		 * retry in the slow path.
> > +		 * If the object is non-shmem backed, we retry again with the
> > +		 * path that handles page fault.
> >   		 */
> > -		if (fast_user_write(i915->gtt.mappable, page_base,
> > -				    page_offset, user_data, page_length)) {
> > -			ret = -EFAULT;
> > -			goto out_flush;
> > +		if (faulted || fast_user_write(i915->gtt.mappable,
> > +						page_base, page_offset,
> > +						user_data, page_length)) {
> > +			if (!obj->base.filp) {
> > +				faulted = true;
> > +				mutex_unlock(&dev->struct_mutex);
> > +				if (slow_user_access(i915->gtt.mappable,
> > +						     page_base,
> > +						     page_offset, user_data,
> > +						     page_length, true)) {
> > +					ret = -EFAULT;
> > +					mutex_lock(&dev->struct_mutex);
> > +					goto out_flush;
> > +				}
> > +
> > +				mutex_lock(&dev->struct_mutex);
> > +			} else {
> > +				ret = -EFAULT;
> > +				goto out_flush;
> > +			}
> >   		}
> 
> Some questions:
> 
> 1. What is the advantage of doing the slow access for non-shmem backed 
> objects inside a single loop, as opposed to extracting it in a separate 
> function?
> 
> For example i915_gem_gtt_pwrite_slow ? Then it could have been called 
> from i915_gem_pwrite_ioctl depending on the master if statement there, > fallback etc.
> 
> I think it would be clearer unless there is a special reason it makes 
> sense to go with the fast path first and then switch to slow path at the 
> point first fault is hit.
I am ready for any of the approach, but Chris suggested to extend
pwrite_fast as it is already being used for faster pwrites.

Chris,
Would it be better to do a pwrite to non-shmem backed objects via a
separate function?

 
> 2. I have noticed the shmem pwrite slowpath makes explicit mention of 
> potential changes to the object domain while the lock was dropped and 
> takes care of flushing the cache in that case.
> 
> Is this something this path should do as well, or if not why not?
I do not think that this path needs to take care of flushing the cache,
as for stolen backed objects are not accessible to CPU hence no
possibility of it being in the CPU cache atleast for the stolen-backed
objects. 
For other non-shmem backed objects (dmabuf, usrptr, phys), may need some
inputs from Chris on how to handle it.

Thanks, Ankit
ankitprasad.r.sharma@intel.com Dec. 10, 2015, 11:12 a.m. UTC | #5
On Wed, 2015-12-09 at 19:39 +0000, Dave Gordon wrote:
> On 09/12/15 16:15, Tvrtko Ursulin wrote:
> >
> > Hi,
> >
> > On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
> >> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>
> >> This patch adds support for extending the pread/pwrite functionality
> >> for objects not backed by shmem. The access will be made through
> >> gtt interface. This will cover objects backed by stolen memory as well
> >> as other non-shmem backed objects.
> >>
> >> v2: Drop locks around slow_user_access, prefault the pages before
> >> access (Chris)
> >>
> >> v3: Rebased to the latest drm-intel-nightly (Ankit)
> >>
> >> v4: Moved page base & offset calculations outside the copy loop,
> >> corrected data types for size and offset variables, corrected if-else
> >> braces format (Tvrtko/kerneldocs)
> >>
> >> v5: Enabled pread/pwrite for all non-shmem backed objects including
> >> without tiling restrictions (Ankit)
> >>
> >> v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
> >>
> >> v7: Updated commit message, Renamed i915_gem_gtt_read to
> >> i915_gem_gtt_copy,
> >> added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
> >>
> >> v8: Updated v7 commit message, mutex unlock around pwrite slow path for
> >> non-shmem backed objects (Tvrtko)
> >>
> >> Testcase: igt/gem_stolen
> >>
> >> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_gem.c | 151
> >> +++++++++++++++++++++++++++++++++-------
> >>   1 file changed, 127 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> >> b/drivers/gpu/drm/i915/i915_gem.c
> >> index ed97de6..68ed67a 100644
> >> --- a/drivers/gpu/drm/i915/i915_gem.c
> >> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >> @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int
> >> shmem_page_offset, int page_length,
> >>       return ret ? - EFAULT : 0;
> >>   }
> >>
> >> +static inline uint64_t
> >> +slow_user_access(struct io_mapping *mapping,
> >> +         uint64_t page_base, int page_offset,
> >> +         char __user *user_data,
> >> +         int length, bool pwrite)
> >> +{
> >> +    void __iomem *vaddr_inatomic;
> >> +    void *vaddr;
> >> +    uint64_t unwritten;
> >> +
> >> +    vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> >> +    /* We can use the cpu mem copy function because this is X86. */
> >> +    vaddr = (void __force *)vaddr_inatomic + page_offset;
> >> +    if (pwrite)
> >> +        unwritten = __copy_from_user(vaddr, user_data, length);
> >> +    else
> >> +        unwritten = __copy_to_user(user_data, vaddr, length);
> >> +
> >> +    io_mapping_unmap(vaddr_inatomic);
> >> +    return unwritten;
> >> +}
> >> +
> >> +static int
> >> +i915_gem_gtt_copy(struct drm_device *dev,
> >> +           struct drm_i915_gem_object *obj, uint64_t size,
> >> +           uint64_t data_offset, uint64_t data_ptr)
> >> +{
> >> +    struct drm_i915_private *dev_priv = dev->dev_private;
> >> +    char __user *user_data;
> >> +    uint64_t remain;
> >> +    uint64_t offset, page_base;
> >> +    int page_offset, page_length, ret = 0;
> >> +
> >> +    ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> >> +    if (ret)
> >> +        goto out;
> >> +
> >> +    ret = i915_gem_object_set_to_gtt_domain(obj, false);
> >> +    if (ret)
> >> +        goto out_unpin;
> >> +
> >> +    ret = i915_gem_object_put_fence(obj);
> >> +    if (ret)
> >> +        goto out_unpin;
> >> +
> >> +    user_data = to_user_ptr(data_ptr);
> >> +    remain = size;
> >> +    offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> >> +
> >> +    mutex_unlock(&dev->struct_mutex);
> >> +    if (likely(!i915.prefault_disable))
> >> +        ret = fault_in_multipages_writeable(user_data, remain);
> >> +
> >> +    /*
> >> +     * page_offset = offset within page
> >> +     * page_base = page offset within aperture
> >> +     */
> >> +    page_offset = offset_in_page(offset);
> >> +    page_base = offset & PAGE_MASK;
> >> +
> >> +    while (remain > 0) {
> >> +        /* page_length = bytes to copy for this page */
> >> +        page_length = remain;
> >> +        if ((page_offset + remain) > PAGE_SIZE)
> >> +            page_length = PAGE_SIZE - page_offset;
> >> +
> >> +        /* This is a slow read/write as it tries to read from
> >> +         * and write to user memory which may result into page
> >> +         * faults
> >> +         */
> >> +        ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> >> +                       page_offset, user_data,
> >> +                       page_length, false);
> >> +
> >> +        if (ret) {
> >> +            ret = -EFAULT;
> >> +            break;
> >> +        }
> >> +
> >> +        remain -= page_length;
> >> +        user_data += page_length;
> >> +        page_base += page_length;
> >> +        page_offset = 0;
> >> +    }
> >> +
> >> +    mutex_lock(&dev->struct_mutex);
> >> +
> >> +out_unpin:
> >> +    i915_gem_object_ggtt_unpin(obj);
> >> +out:
> >> +    return ret;
> >> +}
> >> +
> >>   static int
> >>   i915_gem_shmem_pread(struct drm_device *dev,
> >>                struct drm_i915_gem_object *obj,
> >> @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev,
> >> void *data,
> >>           goto out;
> >>       }
> >>
> >> -    /* prime objects have no backing filp to GEM pread/pwrite
> >> -     * pages from.
> >> -     */
> >> -    if (!obj->base.filp) {
> >> -        ret = -EINVAL;
> >> -        goto out;
> >> -    }
> >> -
> >>       trace_i915_gem_object_pread(obj, args->offset, args->size);
> >>
> >> -    ret = i915_gem_shmem_pread(dev, obj, args, file);
> >> +    /* pread for non shmem backed objects */
> >> +    if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
> >> +        ret = i915_gem_gtt_copy(dev, obj, args->size,
> >> +                    args->offset, args->data_ptr);
> >> +    else
> >> +        ret = i915_gem_shmem_pread(dev, obj, args, file);
> >
> > Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed
> > objects if tiling is set. Sounds wrong to me unless I am missing something?
> 
> Which GEM objects have obj->base.filp set? Is it ONLY regular gtt-type 
> objects? What about (phys, stolen, userptr, dmabuf, ...?) Which of these 
> is the alternate path going to work with?
Only shmem backed objects have obj->base.filp set, filp pointing to the
shmem file. For all other non-shmem backed objects (stolen, userptr,
dmabuf) we use the alternate path.

-Ankit
Dave Gordon Dec. 10, 2015, 6:18 p.m. UTC | #6
On 10/12/15 11:12, Ankitprasad Sharma wrote:
> On Wed, 2015-12-09 at 19:39 +0000, Dave Gordon wrote:
>> On 09/12/15 16:15, Tvrtko Ursulin wrote:
>>>
>>> Hi,
>>>
>>> On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
>>>> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>>>
>>>> This patch adds support for extending the pread/pwrite functionality
>>>> for objects not backed by shmem. The access will be made through
>>>> gtt interface. This will cover objects backed by stolen memory as well
>>>> as other non-shmem backed objects.
>>>>
>>>> v2: Drop locks around slow_user_access, prefault the pages before
>>>> access (Chris)
>>>>
>>>> v3: Rebased to the latest drm-intel-nightly (Ankit)
>>>>
>>>> v4: Moved page base & offset calculations outside the copy loop,
>>>> corrected data types for size and offset variables, corrected if-else
>>>> braces format (Tvrtko/kerneldocs)
>>>>
>>>> v5: Enabled pread/pwrite for all non-shmem backed objects including
>>>> without tiling restrictions (Ankit)
>>>>
>>>> v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
>>>>
>>>> v7: Updated commit message, Renamed i915_gem_gtt_read to
>>>> i915_gem_gtt_copy,
>>>> added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
>>>>
>>>> v8: Updated v7 commit message, mutex unlock around pwrite slow path for
>>>> non-shmem backed objects (Tvrtko)
>>>>
>>>> Testcase: igt/gem_stolen
>>>>
>>>> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_gem.c | 151
>>>> +++++++++++++++++++++++++++++++++-------
>>>>    1 file changed, 127 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>>>> b/drivers/gpu/drm/i915/i915_gem.c
>>>> index ed97de6..68ed67a 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>> @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int
>>>> shmem_page_offset, int page_length,
>>>>        return ret ? - EFAULT : 0;
>>>>    }
>>>>
>>>> +static inline uint64_t
>>>> +slow_user_access(struct io_mapping *mapping,
>>>> +         uint64_t page_base, int page_offset,
>>>> +         char __user *user_data,
>>>> +         int length, bool pwrite)
>>>> +{
>>>> +    void __iomem *vaddr_inatomic;
>>>> +    void *vaddr;
>>>> +    uint64_t unwritten;
>>>> +
>>>> +    vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
>>>> +    /* We can use the cpu mem copy function because this is X86. */
>>>> +    vaddr = (void __force *)vaddr_inatomic + page_offset;
>>>> +    if (pwrite)
>>>> +        unwritten = __copy_from_user(vaddr, user_data, length);
>>>> +    else
>>>> +        unwritten = __copy_to_user(user_data, vaddr, length);
>>>> +
>>>> +    io_mapping_unmap(vaddr_inatomic);
>>>> +    return unwritten;
>>>> +}
>>>> +
>>>> +static int
>>>> +i915_gem_gtt_copy(struct drm_device *dev,
>>>> +           struct drm_i915_gem_object *obj, uint64_t size,
>>>> +           uint64_t data_offset, uint64_t data_ptr)
>>>> +{
>>>> +    struct drm_i915_private *dev_priv = dev->dev_private;
>>>> +    char __user *user_data;
>>>> +    uint64_t remain;
>>>> +    uint64_t offset, page_base;
>>>> +    int page_offset, page_length, ret = 0;
>>>> +
>>>> +    ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
>>>> +    if (ret)
>>>> +        goto out;
>>>> +
>>>> +    ret = i915_gem_object_set_to_gtt_domain(obj, false);
>>>> +    if (ret)
>>>> +        goto out_unpin;
>>>> +
>>>> +    ret = i915_gem_object_put_fence(obj);
>>>> +    if (ret)
>>>> +        goto out_unpin;
>>>> +
>>>> +    user_data = to_user_ptr(data_ptr);
>>>> +    remain = size;
>>>> +    offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
>>>> +
>>>> +    mutex_unlock(&dev->struct_mutex);
>>>> +    if (likely(!i915.prefault_disable))
>>>> +        ret = fault_in_multipages_writeable(user_data, remain);
>>>> +
>>>> +    /*
>>>> +     * page_offset = offset within page
>>>> +     * page_base = page offset within aperture
>>>> +     */
>>>> +    page_offset = offset_in_page(offset);
>>>> +    page_base = offset & PAGE_MASK;
>>>> +
>>>> +    while (remain > 0) {
>>>> +        /* page_length = bytes to copy for this page */
>>>> +        page_length = remain;
>>>> +        if ((page_offset + remain) > PAGE_SIZE)
>>>> +            page_length = PAGE_SIZE - page_offset;
>>>> +
>>>> +        /* This is a slow read/write as it tries to read from
>>>> +         * and write to user memory which may result into page
>>>> +         * faults
>>>> +         */
>>>> +        ret = slow_user_access(dev_priv->gtt.mappable, page_base,
>>>> +                       page_offset, user_data,
>>>> +                       page_length, false);
>>>> +
>>>> +        if (ret) {
>>>> +            ret = -EFAULT;
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        remain -= page_length;
>>>> +        user_data += page_length;
>>>> +        page_base += page_length;
>>>> +        page_offset = 0;
>>>> +    }
>>>> +
>>>> +    mutex_lock(&dev->struct_mutex);
>>>> +
>>>> +out_unpin:
>>>> +    i915_gem_object_ggtt_unpin(obj);
>>>> +out:
>>>> +    return ret;
>>>> +}
>>>> +
>>>>    static int
>>>>    i915_gem_shmem_pread(struct drm_device *dev,
>>>>                 struct drm_i915_gem_object *obj,
>>>> @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>            goto out;
>>>>        }
>>>>
>>>> -    /* prime objects have no backing filp to GEM pread/pwrite
>>>> -     * pages from.
>>>> -     */
>>>> -    if (!obj->base.filp) {
>>>> -        ret = -EINVAL;
>>>> -        goto out;
>>>> -    }
>>>> -
>>>>        trace_i915_gem_object_pread(obj, args->offset, args->size);
>>>>
>>>> -    ret = i915_gem_shmem_pread(dev, obj, args, file);
>>>> +    /* pread for non shmem backed objects */
>>>> +    if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
>>>> +        ret = i915_gem_gtt_copy(dev, obj, args->size,
>>>> +                    args->offset, args->data_ptr);
>>>> +    else
>>>> +        ret = i915_gem_shmem_pread(dev, obj, args, file);
>>>
>>> Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed
>>> objects if tiling is set. Sounds wrong to me unless I am missing something?
>>
>> Which GEM objects have obj->base.filp set? Is it ONLY regular gtt-type
>> objects? What about (phys, stolen, userptr, dmabuf, ...?) Which of these
>> is the alternate path going to work with?
> Only shmem backed objects have obj->base.filp set, filp pointing to the
> shmem file. For all other non-shmem backed objects (stolen, userptr,
> dmabuf) we use the alternate path.
>
> -Ankit

But 'phys' objects DO have 'filp' set. Which path is expected to work 
for them?

.Dave.
ankitprasad.r.sharma@intel.com Dec. 11, 2015, 5:22 a.m. UTC | #7
On Thu, 2015-12-10 at 18:18 +0000, Dave Gordon wrote:
> On 10/12/15 11:12, Ankitprasad Sharma wrote:
> > On Wed, 2015-12-09 at 19:39 +0000, Dave Gordon wrote:
> >> On 09/12/15 16:15, Tvrtko Ursulin wrote:
> >>>
> >>> Hi,
> >>>
> >>> On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
> >>>> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>>>
> >>>> This patch adds support for extending the pread/pwrite functionality
> >>>> for objects not backed by shmem. The access will be made through
> >>>> gtt interface. This will cover objects backed by stolen memory as well
> >>>> as other non-shmem backed objects.
> >>>>
> >>>> v2: Drop locks around slow_user_access, prefault the pages before
> >>>> access (Chris)
> >>>>
> >>>> v3: Rebased to the latest drm-intel-nightly (Ankit)
> >>>>
> >>>> v4: Moved page base & offset calculations outside the copy loop,
> >>>> corrected data types for size and offset variables, corrected if-else
> >>>> braces format (Tvrtko/kerneldocs)
> >>>>
> >>>> v5: Enabled pread/pwrite for all non-shmem backed objects including
> >>>> without tiling restrictions (Ankit)
> >>>>
> >>>> v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
> >>>>
> >>>> v7: Updated commit message, Renamed i915_gem_gtt_read to
> >>>> i915_gem_gtt_copy,
> >>>> added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
> >>>>
> >>>> v8: Updated v7 commit message, mutex unlock around pwrite slow path for
> >>>> non-shmem backed objects (Tvrtko)
> >>>>
> >>>> Testcase: igt/gem_stolen
> >>>>
> >>>> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>>> ---
> >>>>    drivers/gpu/drm/i915/i915_gem.c | 151
> >>>> +++++++++++++++++++++++++++++++++-------
> >>>>    1 file changed, 127 insertions(+), 24 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> >>>> b/drivers/gpu/drm/i915/i915_gem.c
> >>>> index ed97de6..68ed67a 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_gem.c
> >>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >>>> @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int
> >>>> shmem_page_offset, int page_length,
> >>>>        return ret ? - EFAULT : 0;
> >>>>    }
> >>>>
> >>>> +static inline uint64_t
> >>>> +slow_user_access(struct io_mapping *mapping,
> >>>> +         uint64_t page_base, int page_offset,
> >>>> +         char __user *user_data,
> >>>> +         int length, bool pwrite)
> >>>> +{
> >>>> +    void __iomem *vaddr_inatomic;
> >>>> +    void *vaddr;
> >>>> +    uint64_t unwritten;
> >>>> +
> >>>> +    vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> >>>> +    /* We can use the cpu mem copy function because this is X86. */
> >>>> +    vaddr = (void __force *)vaddr_inatomic + page_offset;
> >>>> +    if (pwrite)
> >>>> +        unwritten = __copy_from_user(vaddr, user_data, length);
> >>>> +    else
> >>>> +        unwritten = __copy_to_user(user_data, vaddr, length);
> >>>> +
> >>>> +    io_mapping_unmap(vaddr_inatomic);
> >>>> +    return unwritten;
> >>>> +}
> >>>> +
> >>>> +static int
> >>>> +i915_gem_gtt_copy(struct drm_device *dev,
> >>>> +           struct drm_i915_gem_object *obj, uint64_t size,
> >>>> +           uint64_t data_offset, uint64_t data_ptr)
> >>>> +{
> >>>> +    struct drm_i915_private *dev_priv = dev->dev_private;
> >>>> +    char __user *user_data;
> >>>> +    uint64_t remain;
> >>>> +    uint64_t offset, page_base;
> >>>> +    int page_offset, page_length, ret = 0;
> >>>> +
> >>>> +    ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> >>>> +    if (ret)
> >>>> +        goto out;
> >>>> +
> >>>> +    ret = i915_gem_object_set_to_gtt_domain(obj, false);
> >>>> +    if (ret)
> >>>> +        goto out_unpin;
> >>>> +
> >>>> +    ret = i915_gem_object_put_fence(obj);
> >>>> +    if (ret)
> >>>> +        goto out_unpin;
> >>>> +
> >>>> +    user_data = to_user_ptr(data_ptr);
> >>>> +    remain = size;
> >>>> +    offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> >>>> +
> >>>> +    mutex_unlock(&dev->struct_mutex);
> >>>> +    if (likely(!i915.prefault_disable))
> >>>> +        ret = fault_in_multipages_writeable(user_data, remain);
> >>>> +
> >>>> +    /*
> >>>> +     * page_offset = offset within page
> >>>> +     * page_base = page offset within aperture
> >>>> +     */
> >>>> +    page_offset = offset_in_page(offset);
> >>>> +    page_base = offset & PAGE_MASK;
> >>>> +
> >>>> +    while (remain > 0) {
> >>>> +        /* page_length = bytes to copy for this page */
> >>>> +        page_length = remain;
> >>>> +        if ((page_offset + remain) > PAGE_SIZE)
> >>>> +            page_length = PAGE_SIZE - page_offset;
> >>>> +
> >>>> +        /* This is a slow read/write as it tries to read from
> >>>> +         * and write to user memory which may result into page
> >>>> +         * faults
> >>>> +         */
> >>>> +        ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> >>>> +                       page_offset, user_data,
> >>>> +                       page_length, false);
> >>>> +
> >>>> +        if (ret) {
> >>>> +            ret = -EFAULT;
> >>>> +            break;
> >>>> +        }
> >>>> +
> >>>> +        remain -= page_length;
> >>>> +        user_data += page_length;
> >>>> +        page_base += page_length;
> >>>> +        page_offset = 0;
> >>>> +    }
> >>>> +
> >>>> +    mutex_lock(&dev->struct_mutex);
> >>>> +
> >>>> +out_unpin:
> >>>> +    i915_gem_object_ggtt_unpin(obj);
> >>>> +out:
> >>>> +    return ret;
> >>>> +}
> >>>> +
> >>>>    static int
> >>>>    i915_gem_shmem_pread(struct drm_device *dev,
> >>>>                 struct drm_i915_gem_object *obj,
> >>>> @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev,
> >>>> void *data,
> >>>>            goto out;
> >>>>        }
> >>>>
> >>>> -    /* prime objects have no backing filp to GEM pread/pwrite
> >>>> -     * pages from.
> >>>> -     */
> >>>> -    if (!obj->base.filp) {
> >>>> -        ret = -EINVAL;
> >>>> -        goto out;
> >>>> -    }
> >>>> -
> >>>>        trace_i915_gem_object_pread(obj, args->offset, args->size);
> >>>>
> >>>> -    ret = i915_gem_shmem_pread(dev, obj, args, file);
> >>>> +    /* pread for non shmem backed objects */
> >>>> +    if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
> >>>> +        ret = i915_gem_gtt_copy(dev, obj, args->size,
> >>>> +                    args->offset, args->data_ptr);
> >>>> +    else
> >>>> +        ret = i915_gem_shmem_pread(dev, obj, args, file);
> >>>
> >>> Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed
> >>> objects if tiling is set. Sounds wrong to me unless I am missing something?
> >>
> >> Which GEM objects have obj->base.filp set? Is it ONLY regular gtt-type
> >> objects? What about (phys, stolen, userptr, dmabuf, ...?) Which of these
> >> is the alternate path going to work with?
> > Only shmem backed objects have obj->base.filp set, filp pointing to the
> > shmem file. For all other non-shmem backed objects (stolen, userptr,
> > dmabuf) we use the alternate path.
> >
> > -Ankit
> 
> But 'phys' objects DO have 'filp' set. Which path is expected to work 
> for them?
Sorry. Yes, phys objects also have filp set. So they won't follow the
alternate path.

> .Dave.
Thanks,
Ankit
Daniel Vetter Dec. 11, 2015, 6:15 p.m. UTC | #8
On Wed, Dec 09, 2015 at 07:39:56PM +0000, Dave Gordon wrote:
> On 09/12/15 16:15, Tvrtko Ursulin wrote:
> >
> >Hi,
> >
> >On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
> >>From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>
> >>This patch adds support for extending the pread/pwrite functionality
> >>for objects not backed by shmem. The access will be made through
> >>gtt interface. This will cover objects backed by stolen memory as well
> >>as other non-shmem backed objects.
> >>
> >>v2: Drop locks around slow_user_access, prefault the pages before
> >>access (Chris)
> >>
> >>v3: Rebased to the latest drm-intel-nightly (Ankit)
> >>
> >>v4: Moved page base & offset calculations outside the copy loop,
> >>corrected data types for size and offset variables, corrected if-else
> >>braces format (Tvrtko/kerneldocs)
> >>
> >>v5: Enabled pread/pwrite for all non-shmem backed objects including
> >>without tiling restrictions (Ankit)
> >>
> >>v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
> >>
> >>v7: Updated commit message, Renamed i915_gem_gtt_read to
> >>i915_gem_gtt_copy,
> >>added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
> >>
> >>v8: Updated v7 commit message, mutex unlock around pwrite slow path for
> >>non-shmem backed objects (Tvrtko)
> >>
> >>Testcase: igt/gem_stolen
> >>
> >>Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>---
> >>  drivers/gpu/drm/i915/i915_gem.c | 151
> >>+++++++++++++++++++++++++++++++++-------
> >>  1 file changed, 127 insertions(+), 24 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/i915/i915_gem.c
> >>b/drivers/gpu/drm/i915/i915_gem.c
> >>index ed97de6..68ed67a 100644
> >>--- a/drivers/gpu/drm/i915/i915_gem.c
> >>+++ b/drivers/gpu/drm/i915/i915_gem.c
> >>@@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int
> >>shmem_page_offset, int page_length,
> >>      return ret ? - EFAULT : 0;
> >>  }
> >>
> >>+static inline uint64_t
> >>+slow_user_access(struct io_mapping *mapping,
> >>+         uint64_t page_base, int page_offset,
> >>+         char __user *user_data,
> >>+         int length, bool pwrite)
> >>+{
> >>+    void __iomem *vaddr_inatomic;
> >>+    void *vaddr;
> >>+    uint64_t unwritten;
> >>+
> >>+    vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> >>+    /* We can use the cpu mem copy function because this is X86. */
> >>+    vaddr = (void __force *)vaddr_inatomic + page_offset;
> >>+    if (pwrite)
> >>+        unwritten = __copy_from_user(vaddr, user_data, length);
> >>+    else
> >>+        unwritten = __copy_to_user(user_data, vaddr, length);
> >>+
> >>+    io_mapping_unmap(vaddr_inatomic);
> >>+    return unwritten;
> >>+}
> >>+
> >>+static int
> >>+i915_gem_gtt_copy(struct drm_device *dev,
> >>+           struct drm_i915_gem_object *obj, uint64_t size,
> >>+           uint64_t data_offset, uint64_t data_ptr)
> >>+{
> >>+    struct drm_i915_private *dev_priv = dev->dev_private;
> >>+    char __user *user_data;
> >>+    uint64_t remain;
> >>+    uint64_t offset, page_base;
> >>+    int page_offset, page_length, ret = 0;
> >>+
> >>+    ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> >>+    if (ret)
> >>+        goto out;
> >>+
> >>+    ret = i915_gem_object_set_to_gtt_domain(obj, false);
> >>+    if (ret)
> >>+        goto out_unpin;
> >>+
> >>+    ret = i915_gem_object_put_fence(obj);
> >>+    if (ret)
> >>+        goto out_unpin;
> >>+
> >>+    user_data = to_user_ptr(data_ptr);
> >>+    remain = size;
> >>+    offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> >>+
> >>+    mutex_unlock(&dev->struct_mutex);
> >>+    if (likely(!i915.prefault_disable))
> >>+        ret = fault_in_multipages_writeable(user_data, remain);
> >>+
> >>+    /*
> >>+     * page_offset = offset within page
> >>+     * page_base = page offset within aperture
> >>+     */
> >>+    page_offset = offset_in_page(offset);
> >>+    page_base = offset & PAGE_MASK;
> >>+
> >>+    while (remain > 0) {
> >>+        /* page_length = bytes to copy for this page */
> >>+        page_length = remain;
> >>+        if ((page_offset + remain) > PAGE_SIZE)
> >>+            page_length = PAGE_SIZE - page_offset;
> >>+
> >>+        /* This is a slow read/write as it tries to read from
> >>+         * and write to user memory which may result into page
> >>+         * faults
> >>+         */
> >>+        ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> >>+                       page_offset, user_data,
> >>+                       page_length, false);
> >>+
> >>+        if (ret) {
> >>+            ret = -EFAULT;
> >>+            break;
> >>+        }
> >>+
> >>+        remain -= page_length;
> >>+        user_data += page_length;
> >>+        page_base += page_length;
> >>+        page_offset = 0;
> >>+    }
> >>+
> >>+    mutex_lock(&dev->struct_mutex);
> >>+
> >>+out_unpin:
> >>+    i915_gem_object_ggtt_unpin(obj);
> >>+out:
> >>+    return ret;
> >>+}
> >>+
> >>  static int
> >>  i915_gem_shmem_pread(struct drm_device *dev,
> >>               struct drm_i915_gem_object *obj,
> >>@@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev,
> >>void *data,
> >>          goto out;
> >>      }
> >>
> >>-    /* prime objects have no backing filp to GEM pread/pwrite
> >>-     * pages from.
> >>-     */
> >>-    if (!obj->base.filp) {
> >>-        ret = -EINVAL;
> >>-        goto out;
> >>-    }
> >>-
> >>      trace_i915_gem_object_pread(obj, args->offset, args->size);
> >>
> >>-    ret = i915_gem_shmem_pread(dev, obj, args, file);
> >>+    /* pread for non shmem backed objects */
> >>+    if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
> >>+        ret = i915_gem_gtt_copy(dev, obj, args->size,
> >>+                    args->offset, args->data_ptr);
> >>+    else
> >>+        ret = i915_gem_shmem_pread(dev, obj, args, file);
> >
> >Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed
> >objects if tiling is set. Sounds wrong to me unless I am missing something?
> 
> Which GEM objects have obj->base.filp set? Is it ONLY regular gtt-type

obj->base.filp is for shmem backed stuff. gtt is irrelevant for backing
storage, well except if you can't read the shmem stuff directly with the
cpu the only way is to go through a gtt device mapping.
-Daniel
Dave Gordon Dec. 15, 2015, 4:22 p.m. UTC | #9
On 11/12/15 18:15, Daniel Vetter wrote:
> On Wed, Dec 09, 2015 at 07:39:56PM +0000, Dave Gordon wrote:
>> On 09/12/15 16:15, Tvrtko Ursulin wrote:
>>>
>>> Hi,
>>>
>>> On 09/12/15 12:46, ankitprasad.r.sharma@intel.com wrote:
>>>> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>>>
>>>> This patch adds support for extending the pread/pwrite functionality
>>>> for objects not backed by shmem. The access will be made through
>>>> gtt interface. This will cover objects backed by stolen memory as well
>>>> as other non-shmem backed objects.
>>>>
>>>> v2: Drop locks around slow_user_access, prefault the pages before
>>>> access (Chris)
>>>>
>>>> v3: Rebased to the latest drm-intel-nightly (Ankit)
>>>>
>>>> v4: Moved page base & offset calculations outside the copy loop,
>>>> corrected data types for size and offset variables, corrected if-else
>>>> braces format (Tvrtko/kerneldocs)
>>>>
>>>> v5: Enabled pread/pwrite for all non-shmem backed objects including
>>>> without tiling restrictions (Ankit)
>>>>
>>>> v6: Using pwrite_fast for non-shmem backed objects as well (Chris)
>>>>
>>>> v7: Updated commit message, Renamed i915_gem_gtt_read to
>>>> i915_gem_gtt_copy,
>>>> added pwrite slow path for non-shmem backed objects (Chris/Tvrtko)
>>>>
>>>> v8: Updated v7 commit message, mutex unlock around pwrite slow path for
>>>> non-shmem backed objects (Tvrtko)
>>>>
>>>> Testcase: igt/gem_stolen
>>>>
>>>> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_gem.c | 151
>>>> +++++++++++++++++++++++++++++++++-------
>>>>   1 file changed, 127 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
[snip!]
>>>>   static int
>>>>   i915_gem_shmem_pread(struct drm_device *dev,
>>>>                struct drm_i915_gem_object *obj,
>>>> @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>           goto out;
>>>>       }
>>>>
>>>> -    /* prime objects have no backing filp to GEM pread/pwrite
>>>> -     * pages from.
>>>> -     */
>>>> -    if (!obj->base.filp) {
>>>> -        ret = -EINVAL;
>>>> -        goto out;
>>>> -    }
>>>> -
>>>>       trace_i915_gem_object_pread(obj, args->offset, args->size);
>>>>
>>>> -    ret = i915_gem_shmem_pread(dev, obj, args, file);
>>>> +    /* pread for non shmem backed objects */
>>>> +    if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
>>>> +        ret = i915_gem_gtt_copy(dev, obj, args->size,
>>>> +                    args->offset, args->data_ptr);
>>>> +    else
>>>> +        ret = i915_gem_shmem_pread(dev, obj, args, file);
>>>
>>> Hm, it will end up calling i915_gem_shmem_pread for non-shmem backed
>>> objects if tiling is set. Sounds wrong to me unless I am missing something?
>>
>> Which GEM objects have obj->base.filp set? Is it ONLY regular gtt-type
>
> obj->base.filp is for shmem backed stuff. gtt is irrelevant for backing
> storage, well except if you can't read the shmem stuff directly with the
> cpu the only way is to go through a gtt device mapping.
> -Daniel

So obj->base.filp is set for both phys and shmem (default) object types; 
I called the latter a "gtt" type just because the get/put_pages() 
functions have "gtt" in their names). But I note that the naming of GEM 
object vfuncs (and vfunc tables) isn't consistent :( Maybe they should 
be named "i915_gem_object_{get,put}_pages_shmem()", and the table would 
then be i915_gem_object_shmem_ops :)

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed97de6..68ed67a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -614,6 +614,99 @@  shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 	return ret ? - EFAULT : 0;
 }
 
+static inline uint64_t
+slow_user_access(struct io_mapping *mapping,
+		 uint64_t page_base, int page_offset,
+		 char __user *user_data,
+		 int length, bool pwrite)
+{
+	void __iomem *vaddr_inatomic;
+	void *vaddr;
+	uint64_t unwritten;
+
+	vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
+	/* We can use the cpu mem copy function because this is X86. */
+	vaddr = (void __force *)vaddr_inatomic + page_offset;
+	if (pwrite)
+		unwritten = __copy_from_user(vaddr, user_data, length);
+	else
+		unwritten = __copy_to_user(user_data, vaddr, length);
+
+	io_mapping_unmap(vaddr_inatomic);
+	return unwritten;
+}
+
+static int
+i915_gem_gtt_copy(struct drm_device *dev,
+		   struct drm_i915_gem_object *obj, uint64_t size,
+		   uint64_t data_offset, uint64_t data_ptr)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	char __user *user_data;
+	uint64_t remain;
+	uint64_t offset, page_base;
+	int page_offset, page_length, ret = 0;
+
+	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+	if (ret)
+		goto out;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (ret)
+		goto out_unpin;
+
+	ret = i915_gem_object_put_fence(obj);
+	if (ret)
+		goto out_unpin;
+
+	user_data = to_user_ptr(data_ptr);
+	remain = size;
+	offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
+
+	mutex_unlock(&dev->struct_mutex);
+	if (likely(!i915.prefault_disable))
+		ret = fault_in_multipages_writeable(user_data, remain);
+
+	/*
+	 * page_offset = offset within page
+	 * page_base = page offset within aperture
+	 */
+	page_offset = offset_in_page(offset);
+	page_base = offset & PAGE_MASK;
+
+	while (remain > 0) {
+		/* page_length = bytes to copy for this page */
+		page_length = remain;
+		if ((page_offset + remain) > PAGE_SIZE)
+			page_length = PAGE_SIZE - page_offset;
+
+		/* This is a slow read/write as it tries to read from
+		 * and write to user memory which may result into page
+		 * faults
+		 */
+		ret = slow_user_access(dev_priv->gtt.mappable, page_base,
+				       page_offset, user_data,
+				       page_length, false);
+
+		if (ret) {
+			ret = -EFAULT;
+			break;
+		}
+
+		remain -= page_length;
+		user_data += page_length;
+		page_base += page_length;
+		page_offset = 0;
+	}
+
+	mutex_lock(&dev->struct_mutex);
+
+out_unpin:
+	i915_gem_object_ggtt_unpin(obj);
+out:
+	return ret;
+}
+
 static int
 i915_gem_shmem_pread(struct drm_device *dev,
 		     struct drm_i915_gem_object *obj,
@@ -737,17 +830,14 @@  i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/* prime objects have no backing filp to GEM pread/pwrite
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	trace_i915_gem_object_pread(obj, args->offset, args->size);
 
-	ret = i915_gem_shmem_pread(dev, obj, args, file);
+	/* pread for non shmem backed objects */
+	if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE)
+		ret = i915_gem_gtt_copy(dev, obj, args->size,
+					args->offset, args->data_ptr);
+	else
+		ret = i915_gem_shmem_pread(dev, obj, args, file);
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -789,10 +879,12 @@  i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 			 struct drm_i915_gem_pwrite *args,
 			 struct drm_file *file)
 {
+	struct drm_device *dev = obj->base.dev;
 	struct drm_mm_node node;
 	uint64_t remain, offset;
 	char __user *user_data;
 	int ret;
+	bool faulted = false;
 
 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
 	if (ret) {
@@ -851,11 +943,29 @@  i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 		/* If we get a fault while copying data, then (presumably) our
 		 * source page isn't available.  Return the error and we'll
 		 * retry in the slow path.
+		 * If the object is non-shmem backed, we retry again with the
+		 * path that handles page fault.
 		 */
-		if (fast_user_write(i915->gtt.mappable, page_base,
-				    page_offset, user_data, page_length)) {
-			ret = -EFAULT;
-			goto out_flush;
+		if (faulted || fast_user_write(i915->gtt.mappable,
+						page_base, page_offset,
+						user_data, page_length)) {
+			if (!obj->base.filp) {
+				faulted = true;
+				mutex_unlock(&dev->struct_mutex);
+				if (slow_user_access(i915->gtt.mappable,
+						     page_base,
+						     page_offset, user_data,
+						     page_length, true)) {
+					ret = -EFAULT;
+					mutex_lock(&dev->struct_mutex);
+					goto out_flush;
+				}
+
+				mutex_lock(&dev->struct_mutex);
+			} else {
+				ret = -EFAULT;
+				goto out_flush;
+			}
 		}
 
 		remain -= page_length;
@@ -1121,14 +1231,6 @@  i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/* prime objects have no backing filp to GEM pread/pwrite
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
 	ret = -EFAULT;
@@ -1139,8 +1241,9 @@  i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * perspective, requiring manual detiling by the client.
 	 */
 	if (obj->tiling_mode == I915_TILING_NONE &&
-	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
-	    cpu_write_needs_clflush(obj)) {
+	    (!obj->base.filp ||
+	    (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+	    cpu_write_needs_clflush(obj)))) {
 		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
@@ -1150,7 +1253,7 @@  i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	if (ret == -EFAULT || ret == -ENOSPC) {
 		if (obj->phys_handle)
 			ret = i915_gem_phys_pwrite(obj, args, file);
-		else
+		else if (obj->base.filp)
 			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
 	}