Message ID | 1423518859-6199-7-git-send-email-benjamin.widawsky@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Feb 09, 2015 at 01:54:19PM -0800, Ben Widawsky wrote: > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 ++++ > drivers/gpu/drm/i915/i915_gem.c | 32 ++++++++++++++++++++++++++++---- > drivers/gpu/drm/i915/i915_gem_gtt.c | 13 ++++++++++--- > 3 files changed, 42 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 5d2f62d..dfecdfd 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2818,6 +2818,10 @@ static inline bool cpu_cache_is_coherent(struct drm_device *dev, > { > return HAS_LLC(dev) || level != I915_CACHE_NONE; > } > +static inline bool i915_gem_obj_should_clflush(struct drm_i915_gem_object *obj) > +{ > + return obj->base.size >= to_i915(obj->base.dev)->wbinvd_threshold; > +} if (i915_gem_obj_should_clflush(obj)) wbinvd()? Does wbinvd always have the same characteristic threshold, even coupled with a second access (read or write) inside the TLB flushing of kunmap_atomic. I would imagine that these workloads are dramatically different to the replacement in execbuffer. -Chris
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5739
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
PNV 282/283 282/283
ILK 271/278 271/278
SNB +2-22 340/346 320/346
IVB +1-2 378/384 377/384
BYT 296/296 296/296
HSW +4 421/428 425/428
BDW 318/333 318/333
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
*ILK igt_drv_suspend_debugfs-reader DMESG_WARN(1, M37) NO_RESULT(1, M37)
*ILK igt_drv_suspend_fence-restore-tiled2untiled DMESG_WARN(1, M37) NO_RESULT(1, M37)
*ILK igt_drv_suspend_fence-restore-untiled DMESG_WARN(1, M37) NO_RESULT(1, M37)
*ILK igt_drv_suspend_forcewake DMESG_WARN(1, M37) NO_RESULT(1, M37)
*ILK igt_gem_workarounds_suspend-resume DMESG_WARN(1, M37) INIT(1, M37)
SNB igt_kms_cursor_crc_cursor-size-change NSPT(1, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_kms_flip_event_leak NSPT(1, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_kms_flip_modeset-vs-vblank-race DMESG_WARN(1, M22)PASS(1, M22) PASS(1, M22)
SNB igt_kms_mmio_vs_cs_flip_setcrtc_vs_cs_flip NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_kms_mmio_vs_cs_flip_setplane_vs_cs_flip NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_kms_pipe_crc_basic_read-crc-pipe-A DMESG_WARN(1, M22)PASS(6, M22) PASS(1, M22)
SNB igt_kms_rotation_crc_primary-rotation NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_kms_rotation_crc_sprite-rotation NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_cursor NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_cursor-dpms NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_dpms-mode-unset-non-lpsp NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_dpms-non-lpsp NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_drm-resources-equal NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_fences NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_fences-dpms NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_gem-execbuf NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_gem-mmap-cpu NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_gem-mmap-gtt NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_gem-pread NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_i2c NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_modeset-non-lpsp NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_modeset-non-lpsp-stress-no-wait NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_pci-d3-state NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
SNB igt_pm_rpm_rte NSPT(2, M22)PASS(1, M22) NSPT(1, M22)
IVB igt_gem_pwrite_pread_snooped-copy-performance DMESG_WARN(1, M34)PASS(5, M34) DMESG_WARN(1, M34)
IVB igt_gem_storedw_batches_loop_normal DMESG_WARN(2, M34)PASS(2, M34) PASS(1, M34)
IVB igt_gem_storedw_batches_loop_secure-dispatch DMESG_WARN(1, M34)PASS(3, M34) DMESG_WARN(1, M34)
HSW igt_gem_storedw_loop_blt DMESG_WARN(3, M20)PASS(3, M20) PASS(1, M20)
HSW igt_gem_storedw_loop_vebox DMESG_WARN(3, M20)PASS(2, M20) PASS(1, M20)
*HSW igt_kms_flip_bo-too-big BLACKLIST(1, M20) PASS(1, M20)
*HSW igt_kms_flip_bo-too-big-interruptible BLACKLIST(1, M20) PASS(1, M20)
Note: You need to pay more attention to line start with '*'
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5d2f62d..dfecdfd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2818,6 +2818,10 @@ static inline bool cpu_cache_is_coherent(struct drm_device *dev, { return HAS_LLC(dev) || level != I915_CACHE_NONE; } +static inline bool i915_gem_obj_should_clflush(struct drm_i915_gem_object *obj) +{ + return obj->base.size >= to_i915(obj->base.dev)->wbinvd_threshold; +} int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int i915_gem_init_rings(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4d5a69d..59be709 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -204,6 +204,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) char *vaddr = obj->phys_handle->vaddr; struct sg_table *st; struct scatterlist *sg; + const bool do_wbinvd = i915_gem_obj_should_clflush(obj); int i; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) @@ -219,12 +220,15 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) src = kmap_atomic(page); memcpy(vaddr, src, PAGE_SIZE); - drm_clflush_virt_range(vaddr, PAGE_SIZE); + if (!do_wbinvd) + drm_clflush_virt_range(vaddr, PAGE_SIZE); kunmap_atomic(src); page_cache_release(page); vaddr += PAGE_SIZE; } + if (do_wbinvd) + wbinvd(); i915_gem_chipset_flush(obj->base.dev); @@ -252,6 +256,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) static void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) { + const bool do_wbinvd = i915_gem_obj_should_clflush(obj); int ret; BUG_ON(obj->madv == __I915_MADV_PURGED); @@ -282,7 +287,8 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) continue; dst = kmap_atomic(page); - drm_clflush_virt_range(vaddr, PAGE_SIZE); + if (!do_wbinvd) + drm_clflush_virt_range(vaddr, PAGE_SIZE); memcpy(dst, vaddr, PAGE_SIZE); kunmap_atomic(dst); @@ -295,6 +301,9 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) obj->dirty = 0; } + if (do_wbinvd && !ret) + wbinvd(); + sg_free_table(obj->pages); kfree(obj->pages); @@ -396,7 +405,10 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, return -EFAULT; } - drm_clflush_virt_range(vaddr, args->size); + if (args->size >= to_i915(obj->base.dev)->wbinvd_threshold) + wbinvd(); + else + drm_clflush_virt_range(vaddr, args->size); i915_gem_chipset_flush(dev); return 0; } @@ -647,6 +659,7 @@ i915_gem_shmem_pread(struct drm_device *dev, int obj_do_bit17_swizzling, page_do_bit17_swizzling; int prefaulted = 0; int needs_clflush = 0; + bool do_wbinvd = false; struct sg_page_iter sg_iter; user_data = to_user_ptr(args->data_ptr); @@ -658,6 +671,9 @@ i915_gem_shmem_pread(struct drm_device *dev, if (ret) return ret; + if (needs_clflush && i915_gem_obj_should_clflush(obj)) + do_wbinvd = true; + offset = args->offset; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, @@ -714,6 +730,9 @@ next_page: } out: + if (do_wbinvd && !ret) + wbinvd(); + i915_gem_object_unpin_pages(obj); return ret; @@ -4061,7 +4080,12 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, false); + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + if (is_cpu_flush_required(obj) && + obj->base.size >= dev_priv->wbinvd_threshold) + wbinvd(); + else + i915_gem_clflush_object(obj, false); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 746f77f..13cc493 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -371,6 +371,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; struct sg_page_iter sg_iter; + const bool needs_flush = !HAS_LLC(ppgtt->base.dev); + bool do_wbinvd = needs_flush && + pages->nents * PAGE_SIZE >= to_i915(vm->dev)->wbinvd_threshold; pt_vaddr = NULL; @@ -385,7 +388,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), cache_level, true); if (++pte == GEN8_PTES_PER_PAGE) { - if (!HAS_LLC(ppgtt->base.dev)) + if (needs_flush && !do_wbinvd) drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); kunmap_atomic(pt_vaddr); pt_vaddr = NULL; @@ -401,6 +404,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); kunmap_atomic(pt_vaddr); } + + if (do_wbinvd) + wbinvd(); } static void gen8_free_page_tables(struct page **pt_pages) @@ -660,11 +666,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, I915_CACHE_LLC); } - if (!HAS_LLC(ppgtt->base.dev)) - drm_clflush_virt_range(pd_vaddr, PAGE_SIZE); kunmap_atomic(pd_vaddr); } + if (!HAS_LLC(ppgtt->base.dev)) + wbinvd(); + ppgtt->switch_mm = gen8_mm_switch; ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ drivers/gpu/drm/i915/i915_gem.c | 32 ++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_gem_gtt.c | 13 ++++++++++--- 3 files changed, 42 insertions(+), 7 deletions(-)