diff mbox

[6/6] drm/i915: obey wbinvd threshold in more places

Message ID 1423518859-6199-7-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Feb. 9, 2015, 9:54 p.m. UTC
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h     |  4 ++++
 drivers/gpu/drm/i915/i915_gem.c     | 32 ++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_gtt.c | 13 ++++++++++---
 3 files changed, 42 insertions(+), 7 deletions(-)

Comments

Chris Wilson Feb. 10, 2015, 9:28 a.m. UTC | #1
On Mon, Feb 09, 2015 at 01:54:19PM -0800, Ben Widawsky wrote:
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  4 ++++
>  drivers/gpu/drm/i915/i915_gem.c     | 32 ++++++++++++++++++++++++++++----
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 13 ++++++++++---
>  3 files changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5d2f62d..dfecdfd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2818,6 +2818,10 @@ static inline bool cpu_cache_is_coherent(struct drm_device *dev,
>  {
>  	return HAS_LLC(dev) || level != I915_CACHE_NONE;
>  }
> +static inline bool i915_gem_obj_should_clflush(struct drm_i915_gem_object *obj)
> +{
> +	return obj->base.size >= to_i915(obj->base.dev)->wbinvd_threshold;
> +}

if (i915_gem_obj_should_clflush(obj)) wbinvd()?

Does wbinvd always have the same characteristic threshold, even coupled
with a second access (read or write) inside the TLB flushing of
kunmap_atomic. I would imagine that these workloads are dramatically
different to the replacement in execbuffer.
-Chris
Shuang He Feb. 10, 2015, 8:49 p.m. UTC | #2
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5739
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                                  282/283              282/283
ILK                                  271/278              271/278
SNB              +2-22              340/346              320/346
IVB              +1-2              378/384              377/384
BYT                                  296/296              296/296
HSW              +4                 421/428              425/428
BDW                                  318/333              318/333
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*ILK  igt_drv_suspend_debugfs-reader      DMESG_WARN(1, M37)      NO_RESULT(1, M37)
*ILK  igt_drv_suspend_fence-restore-tiled2untiled      DMESG_WARN(1, M37)      NO_RESULT(1, M37)
*ILK  igt_drv_suspend_fence-restore-untiled      DMESG_WARN(1, M37)      NO_RESULT(1, M37)
*ILK  igt_drv_suspend_forcewake      DMESG_WARN(1, M37)      NO_RESULT(1, M37)
*ILK  igt_gem_workarounds_suspend-resume      DMESG_WARN(1, M37)      INIT(1, M37)
 SNB  igt_kms_cursor_crc_cursor-size-change      NSPT(1, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_kms_flip_event_leak      NSPT(1, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_kms_flip_modeset-vs-vblank-race      DMESG_WARN(1, M22)PASS(1, M22)      PASS(1, M22)
 SNB  igt_kms_mmio_vs_cs_flip_setcrtc_vs_cs_flip      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_kms_mmio_vs_cs_flip_setplane_vs_cs_flip      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_kms_pipe_crc_basic_read-crc-pipe-A      DMESG_WARN(1, M22)PASS(6, M22)      PASS(1, M22)
 SNB  igt_kms_rotation_crc_primary-rotation      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_kms_rotation_crc_sprite-rotation      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_cursor      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_cursor-dpms      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_dpms-mode-unset-non-lpsp      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_dpms-non-lpsp      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_drm-resources-equal      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_fences      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_fences-dpms      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_gem-execbuf      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_gem-mmap-cpu      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_gem-mmap-gtt      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_gem-pread      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_i2c      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_modeset-non-lpsp      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_modeset-non-lpsp-stress-no-wait      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_pci-d3-state      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 SNB  igt_pm_rpm_rte      NSPT(2, M22)PASS(1, M22)      NSPT(1, M22)
 IVB  igt_gem_pwrite_pread_snooped-copy-performance      DMESG_WARN(1, M34)PASS(5, M34)      DMESG_WARN(1, M34)
 IVB  igt_gem_storedw_batches_loop_normal      DMESG_WARN(2, M34)PASS(2, M34)      PASS(1, M34)
 IVB  igt_gem_storedw_batches_loop_secure-dispatch      DMESG_WARN(1, M34)PASS(3, M34)      DMESG_WARN(1, M34)
 HSW  igt_gem_storedw_loop_blt      DMESG_WARN(3, M20)PASS(3, M20)      PASS(1, M20)
 HSW  igt_gem_storedw_loop_vebox      DMESG_WARN(3, M20)PASS(2, M20)      PASS(1, M20)
*HSW  igt_kms_flip_bo-too-big      BLACKLIST(1, M20)      PASS(1, M20)
*HSW  igt_kms_flip_bo-too-big-interruptible      BLACKLIST(1, M20)      PASS(1, M20)
Note: You need to pay more attention to line start with '*'
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5d2f62d..dfecdfd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2818,6 +2818,10 @@  static inline bool cpu_cache_is_coherent(struct drm_device *dev,
 {
 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
 }
+static inline bool i915_gem_obj_should_clflush(struct drm_i915_gem_object *obj)
+{
+	return obj->base.size >= to_i915(obj->base.dev)->wbinvd_threshold;
+}
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int i915_gem_init_rings(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4d5a69d..59be709 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -204,6 +204,7 @@  i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	char *vaddr = obj->phys_handle->vaddr;
 	struct sg_table *st;
 	struct scatterlist *sg;
+	const bool do_wbinvd = i915_gem_obj_should_clflush(obj);
 	int i;
 
 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
@@ -219,12 +220,15 @@  i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 
 		src = kmap_atomic(page);
 		memcpy(vaddr, src, PAGE_SIZE);
-		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		if (!do_wbinvd)
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
 		kunmap_atomic(src);
 
 		page_cache_release(page);
 		vaddr += PAGE_SIZE;
 	}
+	if (do_wbinvd)
+		wbinvd();
 
 	i915_gem_chipset_flush(obj->base.dev);
 
@@ -252,6 +256,7 @@  i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 static void
 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 {
+	const bool do_wbinvd = i915_gem_obj_should_clflush(obj);
 	int ret;
 
 	BUG_ON(obj->madv == __I915_MADV_PURGED);
@@ -282,7 +287,8 @@  i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 				continue;
 
 			dst = kmap_atomic(page);
-			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			if (!do_wbinvd)
+				drm_clflush_virt_range(vaddr, PAGE_SIZE);
 			memcpy(dst, vaddr, PAGE_SIZE);
 			kunmap_atomic(dst);
 
@@ -295,6 +301,9 @@  i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 		obj->dirty = 0;
 	}
 
+	if (do_wbinvd && !ret)
+		wbinvd();
+
 	sg_free_table(obj->pages);
 	kfree(obj->pages);
 
@@ -396,7 +405,10 @@  i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 			return -EFAULT;
 	}
 
-	drm_clflush_virt_range(vaddr, args->size);
+	if (args->size >= to_i915(obj->base.dev)->wbinvd_threshold)
+		wbinvd();
+	else
+		drm_clflush_virt_range(vaddr, args->size);
 	i915_gem_chipset_flush(dev);
 	return 0;
 }
@@ -647,6 +659,7 @@  i915_gem_shmem_pread(struct drm_device *dev,
 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 	int prefaulted = 0;
 	int needs_clflush = 0;
+	bool do_wbinvd = false;
 	struct sg_page_iter sg_iter;
 
 	user_data = to_user_ptr(args->data_ptr);
@@ -658,6 +671,9 @@  i915_gem_shmem_pread(struct drm_device *dev,
 	if (ret)
 		return ret;
 
+	if (needs_clflush && i915_gem_obj_should_clflush(obj))
+		do_wbinvd = true;
+
 	offset = args->offset;
 
 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
@@ -714,6 +730,9 @@  next_page:
 	}
 
 out:
+	if (do_wbinvd && !ret)
+		wbinvd();
+
 	i915_gem_object_unpin_pages(obj);
 
 	return ret;
@@ -4061,7 +4080,12 @@  i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, false);
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		if (is_cpu_flush_required(obj) &&
+		    obj->base.size >= dev_priv->wbinvd_threshold)
+			wbinvd();
+		else
+			i915_gem_clflush_object(obj, false);
 
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 746f77f..13cc493 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -371,6 +371,9 @@  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
 	struct sg_page_iter sg_iter;
+	const bool needs_flush = !HAS_LLC(ppgtt->base.dev);
+	bool do_wbinvd = needs_flush &&
+		pages->nents * PAGE_SIZE >= to_i915(vm->dev)->wbinvd_threshold;
 
 	pt_vaddr = NULL;
 
@@ -385,7 +388,7 @@  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 			gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
 					cache_level, true);
 		if (++pte == GEN8_PTES_PER_PAGE) {
-			if (!HAS_LLC(ppgtt->base.dev))
+			if (needs_flush && !do_wbinvd)
 				drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
@@ -401,6 +404,9 @@  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 			drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
 		kunmap_atomic(pt_vaddr);
 	}
+
+	if (do_wbinvd)
+		wbinvd();
 }
 
 static void gen8_free_page_tables(struct page **pt_pages)
@@ -660,11 +666,12 @@  static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
 						      I915_CACHE_LLC);
 		}
-		if (!HAS_LLC(ppgtt->base.dev))
-			drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
 		kunmap_atomic(pd_vaddr);
 	}
 
+	if (!HAS_LLC(ppgtt->base.dev))
+		wbinvd();
+
 	ppgtt->switch_mm = gen8_mm_switch;
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;