diff mbox

[22/29] drm/i915: Handle stolen objects in pwrite

Message ID 1344696088-24760-23-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Aug. 11, 2012, 2:41 p.m. UTC
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |  169 +++++++++++++++++++++++++--------------
 1 file changed, 111 insertions(+), 58 deletions(-)

Comments

Daniel Vetter Aug. 20, 2012, 7:56 p.m. UTC | #1
On Sat, Aug 11, 2012 at 03:41:21PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

What about putting kmap/unmap abstractions into obj->ops (like the dma_buf
interface already has)? Since the pwrite/pread code is already rather
branch heave I hope we don't see the overhead of the indirect call even
in microbenchmarks (haven't checked). And this way we would also neatly
wrap up dma_bufs for pwrite (if anyone ever really wants that ...).

The kmap(_atomic) for stolen mem backed objects would boil down to doing
the pointer arithmetic, kunmap would be just a noop.

Cheers, Daniel
> ---
>  drivers/gpu/drm/i915/i915_gem.c |  169 +++++++++++++++++++++++++--------------
>  1 file changed, 111 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 552f95b..a2fb2aa 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -664,19 +664,17 @@ out:
>   * needs_clflush_before is set and flushes out any written cachelines after
>   * writing if needs_clflush is set. */
>  static int
> -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length,
>  		  char __user *user_data,
>  		  bool page_do_bit17_swizzling,
>  		  bool needs_clflush_before,
>  		  bool needs_clflush_after)
>  {
> -	char *vaddr;
>  	int ret;
>  
>  	if (unlikely(page_do_bit17_swizzling))
>  		return -EINVAL;
>  
> -	vaddr = kmap_atomic(page);
>  	if (needs_clflush_before)
>  		drm_clflush_virt_range(vaddr + shmem_page_offset,
>  				       page_length);
> @@ -686,7 +684,6 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
>  	if (needs_clflush_after)
>  		drm_clflush_virt_range(vaddr + shmem_page_offset,
>  				       page_length);
> -	kunmap_atomic(vaddr);
>  
>  	return ret ? -EFAULT : 0;
>  }
> @@ -694,16 +691,14 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
>  /* Only difference to the fast-path function is that this can handle bit17
>   * and uses non-atomic copy and kmap functions. */
>  static int
> -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length,
>  		  char __user *user_data,
>  		  bool page_do_bit17_swizzling,
>  		  bool needs_clflush_before,
>  		  bool needs_clflush_after)
>  {
> -	char *vaddr;
>  	int ret;
>  
> -	vaddr = kmap(page);
>  	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
>  		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
>  					     page_length,
> @@ -720,7 +715,6 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
>  		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
>  					     page_length,
>  					     page_do_bit17_swizzling);
> -	kunmap(page);
>  
>  	return ret ? -EFAULT : 0;
>  }
> @@ -731,6 +725,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  		      struct drm_i915_gem_pwrite *args,
>  		      struct drm_file *file)
>  {
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  	ssize_t remain;
>  	loff_t offset;
>  	char __user *user_data;
> @@ -770,74 +765,132 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	if (ret)
>  		return ret;
>  
> -	i915_gem_object_pin_pages(obj);
> -
>  	offset = args->offset;
>  	obj->dirty = 1;
>  
> -	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> -		struct page *page;
> -		int partial_cacheline_write;
> +	if (obj->stolen) {
> +		while (remain > 0) {
> +			char *vaddr;
> +			int partial_cacheline_write;
>  
> -		if (i < offset >> PAGE_SHIFT)
> -			continue;
> +			/* Operation in this page
> +			 *
> +			 * shmem_page_offset = offset within page in shmem file
> +			 * page_length = bytes to copy for this page
> +			 */
> +			shmem_page_offset = offset_in_page(offset);
>  
> -		if (remain <= 0)
> -			break;
> +			page_length = remain;
> +			if ((shmem_page_offset + page_length) > PAGE_SIZE)
> +				page_length = PAGE_SIZE - shmem_page_offset;
>  
> -		/* Operation in this page
> -		 *
> -		 * shmem_page_offset = offset within page in shmem file
> -		 * page_length = bytes to copy for this page
> -		 */
> -		shmem_page_offset = offset_in_page(offset);
> +			/* If we don't overwrite a cacheline completely we need to be
> +			 * careful to have up-to-date data by first clflushing. Don't
> +			 * overcomplicate things and flush the entire patch. */
> +			partial_cacheline_write = needs_clflush_before &&
> +				((shmem_page_offset | page_length)
> +				 & (boot_cpu_data.x86_clflush_size - 1));
>  
> -		page_length = remain;
> -		if ((shmem_page_offset + page_length) > PAGE_SIZE)
> -			page_length = PAGE_SIZE - shmem_page_offset;
> +			vaddr = (char *)(dev_priv->mm.stolen_base + obj->stolen->start + offset);
> +			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> +				((uintptr_t)vaddr & (1 << 17)) != 0;
>  
> -		/* If we don't overwrite a cacheline completely we need to be
> -		 * careful to have up-to-date data by first clflushing. Don't
> -		 * overcomplicate things and flush the entire patch. */
> -		partial_cacheline_write = needs_clflush_before &&
> -			((shmem_page_offset | page_length)
> -				& (boot_cpu_data.x86_clflush_size - 1));
> +			ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
> +						user_data, page_do_bit17_swizzling,
> +						partial_cacheline_write,
> +						needs_clflush_after);
>  
> -		page = sg_page(sg);
> -		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> -			(page_to_phys(page) & (1 << 17)) != 0;
> +			if (ret == 0)
> +				goto next_stolen;
>  
> -		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
> -					user_data, page_do_bit17_swizzling,
> -					partial_cacheline_write,
> -					needs_clflush_after);
> -		if (ret == 0)
> -			goto next_page;
> +			hit_slowpath = 1;
> +			mutex_unlock(&dev->struct_mutex);
>  
> -		hit_slowpath = 1;
> -		mutex_unlock(&dev->struct_mutex);
> -		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
> -					user_data, page_do_bit17_swizzling,
> -					partial_cacheline_write,
> -					needs_clflush_after);
> +			ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
> +						user_data, page_do_bit17_swizzling,
> +						partial_cacheline_write,
> +						needs_clflush_after);
>  
> -		mutex_lock(&dev->struct_mutex);
> +			mutex_lock(&dev->struct_mutex);
> +			if (ret)
> +				goto out;
>  
> -next_page:
> -		set_page_dirty(page);
> -		mark_page_accessed(page);
> +next_stolen:
> +			remain -= page_length;
> +			user_data += page_length;
> +			offset += page_length;
> +		}
> +	} else {
> +		i915_gem_object_pin_pages(obj);
>  
> -		if (ret)
> -			goto out;
> +		for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +			struct page *page;
> +			char *vaddr;
> +			int partial_cacheline_write;
>  
> -		remain -= page_length;
> -		user_data += page_length;
> -		offset += page_length;
> +			if (i < offset >> PAGE_SHIFT)
> +				continue;
> +
> +			if (remain <= 0)
> +				break;
> +
> +			/* Operation in this page
> +			 *
> +			 * shmem_page_offset = offset within page in shmem file
> +			 * page_length = bytes to copy for this page
> +			 */
> +			shmem_page_offset = offset_in_page(offset);
> +
> +			page_length = remain;
> +			if ((shmem_page_offset + page_length) > PAGE_SIZE)
> +				page_length = PAGE_SIZE - shmem_page_offset;
> +
> +			/* If we don't overwrite a cacheline completely we need to be
> +			 * careful to have up-to-date data by first clflushing. Don't
> +			 * overcomplicate things and flush the entire patch. */
> +			partial_cacheline_write = needs_clflush_before &&
> +				((shmem_page_offset | page_length)
> +				 & (boot_cpu_data.x86_clflush_size - 1));
> +
> +			page = sg_page(sg);
> +			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> +				(page_to_phys(page) & (1 << 17)) != 0;
> +
> +			vaddr = kmap_atomic(page);
> +			ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
> +						user_data, page_do_bit17_swizzling,
> +						partial_cacheline_write,
> +						needs_clflush_after);
> +
> +			kunmap_atomic(vaddr);
> +
> +			if (ret == 0)
> +				goto next_page;
> +
> +			hit_slowpath = 1;
> +			mutex_unlock(&dev->struct_mutex);
> +
> +			vaddr = kmap(page);
> +			ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
> +						user_data, page_do_bit17_swizzling,
> +						partial_cacheline_write,
> +						needs_clflush_after);
> +			kunmap(page);
> +
> +			mutex_lock(&dev->struct_mutex);
> +			if (ret)
> +				goto out_unpin;
> +
> +next_page:
> +			remain -= page_length;
> +			user_data += page_length;
> +			offset += page_length;
> +		}
> +out_unpin:
> +		i915_gem_object_unpin_pages(obj);
>  	}
>  
>  out:
> -	i915_gem_object_unpin_pages(obj);
> -
>  	if (hit_slowpath) {
>  		/* Fixup: Kill any reinstated backing storage pages */
>  		if (obj->madv == __I915_MADV_PURGED)
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Aug. 22, 2012, 3:47 p.m. UTC | #2
On Mon, 20 Aug 2012 21:56:08 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Sat, Aug 11, 2012 at 03:41:21PM +0100, Chris Wilson wrote:
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> What about putting kmap/unmap abstractions into obj->ops (like the dma_buf
> interface already has)? Since the pwrite/pread code is already rather
> branch heave I hope we don't see the overhead of the indirect call even
> in microbenchmarks (haven't checked). And this way we would also neatly
> wrap up dma_bufs for pwrite (if anyone ever really wants that ...).
> 
> The kmap(_atomic) for stolen mem backed objects would boil down to doing
> the pointer arithmetic, kunmap would be just a noop.

Sounds nice, I'll cook something up and allocate yet another pointer in
drm_i915_gem_object for the typed ops. I wonder if we can unify the phys
and the dma_buf...
-Chris
Chris Wilson Aug. 30, 2012, 3:09 p.m. UTC | #3
On Mon, 20 Aug 2012 21:56:08 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Sat, Aug 11, 2012 at 03:41:21PM +0100, Chris Wilson wrote:
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> What about putting kmap/unmap abstractions into obj->ops (like the dma_buf
> interface already has)? Since the pwrite/pread code is already rather
> branch heave I hope we don't see the overhead of the indirect call even
> in microbenchmarks (haven't checked). And this way we would also neatly
> wrap up dma_bufs for pwrite (if anyone ever really wants that ...).
> 
> The kmap(_atomic) for stolen mem backed objects would boil down to doing
> the pointer arithmetic, kunmap would be just a noop.

Tried doing so. The lack of struct page for the stolen makes it more
cumbersome than it is worth, and worse confusing.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 552f95b..a2fb2aa 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -664,19 +664,17 @@  out:
  * needs_clflush_before is set and flushes out any written cachelines after
  * writing if needs_clflush is set. */
 static int
-shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length,
 		  char __user *user_data,
 		  bool page_do_bit17_swizzling,
 		  bool needs_clflush_before,
 		  bool needs_clflush_after)
 {
-	char *vaddr;
 	int ret;
 
 	if (unlikely(page_do_bit17_swizzling))
 		return -EINVAL;
 
-	vaddr = kmap_atomic(page);
 	if (needs_clflush_before)
 		drm_clflush_virt_range(vaddr + shmem_page_offset,
 				       page_length);
@@ -686,7 +684,6 @@  shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 	if (needs_clflush_after)
 		drm_clflush_virt_range(vaddr + shmem_page_offset,
 				       page_length);
-	kunmap_atomic(vaddr);
 
 	return ret ? -EFAULT : 0;
 }
@@ -694,16 +691,14 @@  shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length,
 		  char __user *user_data,
 		  bool page_do_bit17_swizzling,
 		  bool needs_clflush_before,
 		  bool needs_clflush_after)
 {
-	char *vaddr;
 	int ret;
 
-	vaddr = kmap(page);
 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 					     page_length,
@@ -720,7 +715,6 @@  shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 					     page_length,
 					     page_do_bit17_swizzling);
-	kunmap(page);
 
 	return ret ? -EFAULT : 0;
 }
@@ -731,6 +725,7 @@  i915_gem_shmem_pwrite(struct drm_device *dev,
 		      struct drm_i915_gem_pwrite *args,
 		      struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	ssize_t remain;
 	loff_t offset;
 	char __user *user_data;
@@ -770,74 +765,132 @@  i915_gem_shmem_pwrite(struct drm_device *dev,
 	if (ret)
 		return ret;
 
-	i915_gem_object_pin_pages(obj);
-
 	offset = args->offset;
 	obj->dirty = 1;
 
-	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
-		struct page *page;
-		int partial_cacheline_write;
+	if (obj->stolen) {
+		while (remain > 0) {
+			char *vaddr;
+			int partial_cacheline_write;
 
-		if (i < offset >> PAGE_SHIFT)
-			continue;
+			/* Operation in this page
+			 *
+			 * shmem_page_offset = offset within page in shmem file
+			 * page_length = bytes to copy for this page
+			 */
+			shmem_page_offset = offset_in_page(offset);
 
-		if (remain <= 0)
-			break;
+			page_length = remain;
+			if ((shmem_page_offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - shmem_page_offset;
 
-		/* Operation in this page
-		 *
-		 * shmem_page_offset = offset within page in shmem file
-		 * page_length = bytes to copy for this page
-		 */
-		shmem_page_offset = offset_in_page(offset);
+			/* If we don't overwrite a cacheline completely we need to be
+			 * careful to have up-to-date data by first clflushing. Don't
+			 * overcomplicate things and flush the entire patch. */
+			partial_cacheline_write = needs_clflush_before &&
+				((shmem_page_offset | page_length)
+				 & (boot_cpu_data.x86_clflush_size - 1));
 
-		page_length = remain;
-		if ((shmem_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - shmem_page_offset;
+			vaddr = (char *)(dev_priv->mm.stolen_base + obj->stolen->start + offset);
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				((uintptr_t)vaddr & (1 << 17)) != 0;
 
-		/* If we don't overwrite a cacheline completely we need to be
-		 * careful to have up-to-date data by first clflushing. Don't
-		 * overcomplicate things and flush the entire patch. */
-		partial_cacheline_write = needs_clflush_before &&
-			((shmem_page_offset | page_length)
-				& (boot_cpu_data.x86_clflush_size - 1));
+			ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
 
-		page = sg_page(sg);
-		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-			(page_to_phys(page) & (1 << 17)) != 0;
+			if (ret == 0)
+				goto next_stolen;
 
-		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
-					user_data, page_do_bit17_swizzling,
-					partial_cacheline_write,
-					needs_clflush_after);
-		if (ret == 0)
-			goto next_page;
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
 
-		hit_slowpath = 1;
-		mutex_unlock(&dev->struct_mutex);
-		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
-					user_data, page_do_bit17_swizzling,
-					partial_cacheline_write,
-					needs_clflush_after);
+			ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
 
-		mutex_lock(&dev->struct_mutex);
+			mutex_lock(&dev->struct_mutex);
+			if (ret)
+				goto out;
 
-next_page:
-		set_page_dirty(page);
-		mark_page_accessed(page);
+next_stolen:
+			remain -= page_length;
+			user_data += page_length;
+			offset += page_length;
+		}
+	} else {
+		i915_gem_object_pin_pages(obj);
 
-		if (ret)
-			goto out;
+		for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+			struct page *page;
+			char *vaddr;
+			int partial_cacheline_write;
 
-		remain -= page_length;
-		user_data += page_length;
-		offset += page_length;
+			if (i < offset >> PAGE_SHIFT)
+				continue;
+
+			if (remain <= 0)
+				break;
+
+			/* Operation in this page
+			 *
+			 * shmem_page_offset = offset within page in shmem file
+			 * page_length = bytes to copy for this page
+			 */
+			shmem_page_offset = offset_in_page(offset);
+
+			page_length = remain;
+			if ((shmem_page_offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - shmem_page_offset;
+
+			/* If we don't overwrite a cacheline completely we need to be
+			 * careful to have up-to-date data by first clflushing. Don't
+			 * overcomplicate things and flush the entire patch. */
+			partial_cacheline_write = needs_clflush_before &&
+				((shmem_page_offset | page_length)
+				 & (boot_cpu_data.x86_clflush_size - 1));
+
+			page = sg_page(sg);
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				(page_to_phys(page) & (1 << 17)) != 0;
+
+			vaddr = kmap_atomic(page);
+			ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
+
+			kunmap_atomic(vaddr);
+
+			if (ret == 0)
+				goto next_page;
+
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
+
+			vaddr = kmap(page);
+			ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
+			kunmap(page);
+
+			mutex_lock(&dev->struct_mutex);
+			if (ret)
+				goto out_unpin;
+
+next_page:
+			remain -= page_length;
+			user_data += page_length;
+			offset += page_length;
+		}
+out_unpin:
+		i915_gem_object_unpin_pages(obj);
 	}
 
 out:
-	i915_gem_object_unpin_pages(obj);
-
 	if (hit_slowpath) {
 		/* Fixup: Kill any reinstated backing storage pages */
 		if (obj->madv == __I915_MADV_PURGED)