Message ID | 1471014450-21020-10-git-send-email-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Aug 12, 2016 at 04:07:30PM +0100, Chris Wilson wrote: > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 2fe88d930ca7..8dcdc27afe80 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -715,18 +715,13 @@ static int i915_gem_seqno_info(struct seq_file *m, void *data) > struct drm_device *dev = node->minor->dev; > struct drm_i915_private *dev_priv = to_i915(dev); > struct intel_engine_cs *engine; > - int ret; > > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > intel_runtime_pm_get(dev_priv); > > for_each_engine(engine, dev_priv) > i915_ring_seqno_info(m, engine); > > intel_runtime_pm_put(dev_priv); > - mutex_unlock(&dev->struct_mutex); .... On noes, rebase damage. /o\ -Chris
On 12 August 2016 at 16:07, Chris Wilson <chris@chris-wilson.co.uk> wrote: > If we need to use clflush to prepare our batch for reads from memory, we > can bypass the cache instead by using non-temporal copies. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_cmd_parser.c | 58 ++++++++++++++++++++++------------ > drivers/gpu/drm/i915/i915_debugfs.c | 24 -------------- > drivers/gpu/drm/i915/i915_drv.c | 19 ----------- > drivers/gpu/drm/i915/i915_gem.c | 48 ++++++++++++++++------------ > drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++--- > drivers/gpu/drm/i915/i915_gem_tiling.c | 4 --- > drivers/gpu/drm/i915/i915_irq.c | 2 -- > drivers/gpu/drm/i915/intel_uncore.c | 6 ++-- > 8 files changed, 81 insertions(+), 97 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c > index cea3ef7299cc..3244ef1401ad 100644 > --- a/drivers/gpu/drm/i915/i915_cmd_parser.c > +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c > @@ -969,8 +969,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, > { > unsigned int src_needs_clflush; > unsigned int dst_needs_clflush; > - void *dst, *ptr; > - int offset, n; > + void *dst; > int ret; > > ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); > @@ -987,24 +986,43 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, > if (IS_ERR(dst)) > goto unpin_dst; > > - ptr = dst; > - offset = offset_in_page(batch_start_offset); > - if (dst_needs_clflush & CLFLUSH_BEFORE) > - batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size); > - > - for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { > - int len = min_t(int, batch_len, PAGE_SIZE - offset); > - void *vaddr; > - > - vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n)); > - if (src_needs_clflush) > - drm_clflush_virt_range(vaddr + offset, len); > - memcpy(ptr, vaddr + offset, len); > - kunmap_atomic(vaddr); > - > - ptr += len; > - batch_len -= len; > - offset = 0; > + if (src_needs_clflush && > + i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) { > + void *src; > + > + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); > + if (IS_ERR(src)) > + goto shmem_copy; > + > + i915_memcpy_from_wc(dst, > + src + batch_start_offset, > + ALIGN(batch_len, 16)); > + i915_gem_object_unpin_map(src_obj); > + } else { > + void *ptr; > + int offset, n; > + > +shmem_copy: I think Joonas may shed another tear at the sight of this :) > + offset = offset_in_page(batch_start_offset); > + if (dst_needs_clflush & CLFLUSH_BEFORE) > + batch_len = roundup(batch_len, > + boot_cpu_data.x86_clflush_size); > + > + ptr = dst; > + for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { > + int len = min_t(int, batch_len, PAGE_SIZE - offset); > + void *vaddr; > + > + vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n)); > + if (src_needs_clflush) > + drm_clflush_virt_range(vaddr + offset, len); > + memcpy(ptr, vaddr + offset, len); > + kunmap_atomic(vaddr); > + > + ptr += len; > + batch_len -= len; > + offset = 0; > + } > } > Disregarding the rest, which seems unrelated to this patch. Reviewed-by: Matthew Auld <matthew.auld@intel.com> > /* dst_obj is returned with vmap pinned */ > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 2fe88d930ca7..8dcdc27afe80 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -715,18 +715,13 @@ static int i915_gem_seqno_info(struct seq_file *m, void *data) > struct drm_device *dev = node->minor->dev; > struct drm_i915_private *dev_priv = to_i915(dev); > struct intel_engine_cs *engine; > - int ret; > > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > intel_runtime_pm_get(dev_priv); > > for_each_engine(engine, dev_priv) > i915_ring_seqno_info(m, engine); > > intel_runtime_pm_put(dev_priv); > - mutex_unlock(&dev->struct_mutex); > > return 0; > } > @@ -1379,11 +1374,7 @@ static int ironlake_drpc_info(struct seq_file *m) > struct drm_i915_private *dev_priv = to_i915(dev); > u32 rgvmodectl, rstdbyctl; > u16 crstandvid; > - int ret; > > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > intel_runtime_pm_get(dev_priv); > > rgvmodectl = I915_READ(MEMMODECTL); > @@ -1391,7 +1382,6 @@ static int ironlake_drpc_info(struct seq_file *m) > crstandvid = I915_READ16(CRSTANDVID); > > intel_runtime_pm_put(dev_priv); > - mutex_unlock(&dev->struct_mutex); > > seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); > seq_printf(m, "Boost freq: %d\n", > @@ -2179,11 +2169,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) > struct drm_info_node *node = m->private; > struct drm_device *dev = node->minor->dev; > struct drm_i915_private *dev_priv = to_i915(dev); > - int ret; > > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > intel_runtime_pm_get(dev_priv); > > seq_printf(m, "bit6 swizzle for X-tiling = %s\n", > @@ -2223,7 +2209,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data) > seq_puts(m, "L-shaped memory detected\n"); > > intel_runtime_pm_put(dev_priv); > - mutex_unlock(&dev->struct_mutex); > > return 0; > } > @@ -4729,13 +4714,9 @@ i915_wedged_set(void *data, u64 val) > if (i915_reset_in_progress(&dev_priv->gpu_error)) > return -EAGAIN; > > - intel_runtime_pm_get(dev_priv); > - > i915_handle_error(dev_priv, val, > "Manually setting wedged to %llu", val); > > - intel_runtime_pm_put(dev_priv); > - > return 0; > } > > @@ -4976,20 +4957,15 @@ i915_cache_sharing_get(void *data, u64 *val) > struct drm_device *dev = data; > struct drm_i915_private *dev_priv = to_i915(dev); > u32 snpcr; > - int ret; > > if (!(IS_GEN6(dev) || IS_GEN7(dev))) > return -ENODEV; > > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > intel_runtime_pm_get(dev_priv); > > snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); > > intel_runtime_pm_put(dev_priv); > - mutex_unlock(&dev_priv->drm.struct_mutex); > > *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index c040c6329804..b458faa0d349 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -2293,24 +2293,6 @@ static int intel_runtime_suspend(struct device *device) > > DRM_DEBUG_KMS("Suspending device\n"); > > - /* > - * We could deadlock here in case another thread holding struct_mutex > - * calls RPM suspend concurrently, since the RPM suspend will wait > - * first for this RPM suspend to finish. In this case the concurrent > - * RPM resume will be followed by its RPM suspend counterpart. Still > - * for consistency return -EAGAIN, which will reschedule this suspend. > - */ > - if (!mutex_trylock(&dev->struct_mutex)) { > - DRM_DEBUG_KMS("device lock contention, deffering suspend\n"); > - /* > - * Bump the expiration timestamp, otherwise the suspend won't > - * be rescheduled. > - */ > - pm_runtime_mark_last_busy(device); > - > - return -EAGAIN; > - } > - > disable_rpm_wakeref_asserts(dev_priv); > > /* > @@ -2318,7 +2300,6 @@ static int intel_runtime_suspend(struct device *device) > * an RPM reference. > */ > i915_gem_release_all_mmaps(dev_priv); > - mutex_unlock(&dev->struct_mutex); > > intel_guc_suspend(dev); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 5c1acfc10bc4..a26bfd7d6aab 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1434,11 +1434,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > if (ret) > goto err; > > - intel_runtime_pm_get(dev_priv); > - > ret = i915_mutex_lock_interruptible(dev); > if (ret) > - goto err_rpm; > + goto err; > > ret = -EFAULT; > /* We can only do the GTT pwrite on untiled buffers, as otherwise > @@ -1449,7 +1447,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > */ > if (!i915_gem_object_has_struct_page(obj) || > cpu_write_needs_clflush(obj)) { > + intel_runtime_pm_get(dev_priv); > ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); > + intel_runtime_pm_put(dev_priv); > /* Note that the gtt paths might fail with non-page-backed user > * pointers (e.g. gtt mappings when moving data between > * textures). Fallback to the shmem path in that case. */ > @@ -1464,12 +1464,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > > i915_gem_object_put(obj); > mutex_unlock(&dev->struct_mutex); > - intel_runtime_pm_put(dev_priv); > - > return ret; > > -err_rpm: > - intel_runtime_pm_put(dev_priv); > err: > i915_gem_object_put_unlocked(obj); > return ret; > @@ -1833,9 +1829,13 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) > /* Serialisation between user GTT access and our code depends upon > * revoking the CPU's PTE whilst the mutex is held. The next user > * pagefault then has to wait until we release the mutex. > + * > + * Note that RPM complicates somewhat by adding an additional > + * requirement that operations to the GGTT be made holding the RPM > + * wakeref. This in turns allow us to release the mmap from within > + * the RPM suspend code ignoring the struct_mutex serialisation in > + * lieu of the RPM barriers. > */ > - lockdep_assert_held(&obj->base.dev->struct_mutex); > - > if (!obj->fault_mappable) > return; > > @@ -1854,11 +1854,21 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) > obj->fault_mappable = false; > } > > +static void assert_rpm_release_all_mmaps(struct drm_i915_private *dev_priv) > +{ > + assert_rpm_wakelock_held(dev_priv); > +} > + > void > i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) > { > struct drm_i915_gem_object *obj; > > + /* This should only be called by RPM as we require the bound_list > + * to be protected by the RPM barriers and not struct_mutex. > + * We check that we are holding the wakeref whenever we manipulate > + * the dev_priv->mm.bound_list (via assert_rpm_release_all_mmaps). > + */ > list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) > i915_gem_release_mmap(obj); > } > @@ -2402,9 +2412,11 @@ i915_gem_object_retire__read(struct i915_gem_active *active, > * so that we don't steal from recently used but inactive objects > * (unless we are forced to ofc!) > */ > - if (obj->bind_count) > + if (obj->bind_count) { > + assert_rpm_release_all_mmaps(request->i915); > list_move_tail(&obj->global_list, > &request->i915->mm.bound_list); > + } > > if (i915_gem_object_has_active_reference(obj)) { > i915_gem_object_clear_active_reference(obj); > @@ -2881,9 +2893,11 @@ int i915_vma_unbind(struct i915_vma *vma) > > /* Since the unbound list is global, only move to that list if > * no more VMAs exist. */ > - if (--obj->bind_count == 0) > + if (--obj->bind_count == 0) { > + assert_rpm_release_all_mmaps(to_i915(obj->base.dev)); > list_move_tail(&obj->global_list, > &to_i915(obj->base.dev)->mm.unbound_list); > + } > > /* And finally now the object is completely decoupled from this vma, > * we can drop its hold on the backing storage and allow it to be > @@ -3071,6 +3085,7 @@ search_free: > } > GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); > > + assert_rpm_release_all_mmaps(dev_priv); > list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); > list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > obj->bind_count++; > @@ -3420,7 +3435,6 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, > int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, > struct drm_file *file) > { > - struct drm_i915_private *dev_priv = to_i915(dev); > struct drm_i915_gem_caching *args = data; > struct drm_i915_gem_object *obj; > enum i915_cache_level level; > @@ -3449,11 +3463,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, > return -EINVAL; > } > > - intel_runtime_pm_get(dev_priv); > - > ret = i915_mutex_lock_interruptible(dev); > if (ret) > - goto rpm_put; > + return ret; > > obj = i915_gem_object_lookup(file, args->handle); > if (!obj) { > @@ -3462,13 +3474,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, > } > > ret = i915_gem_object_set_cache_level(obj, level); > - > i915_gem_object_put(obj); > unlock: > mutex_unlock(&dev->struct_mutex); > -rpm_put: > - intel_runtime_pm_put(dev_priv); > - > return ret; > } > > @@ -4174,8 +4182,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) > > kfree(obj->bit_17); > i915_gem_object_free(obj); > - > - intel_runtime_pm_put(dev_priv); > } > > void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index fe7f9887ee67..67a3ff960b0d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -2594,6 +2594,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, > enum i915_cache_level cache_level, > u32 flags) > { > + struct drm_i915_private *i915 = to_i915(vma->vm->dev); > struct drm_i915_gem_object *obj = vma->obj; > u32 pte_flags = 0; > int ret; > @@ -2606,8 +2607,10 @@ static int ggtt_bind_vma(struct i915_vma *vma, > if (obj->gt_ro) > pte_flags |= PTE_READ_ONLY; > > + intel_runtime_pm_get(i915); > vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, > cache_level, pte_flags); > + intel_runtime_pm_get(i915); > > /* > * Without aliasing PPGTT there's no difference between > @@ -2623,6 +2626,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, > enum i915_cache_level cache_level, > u32 flags) > { > + struct drm_i915_private *i915 = to_i915(vma->vm->dev); > u32 pte_flags; > int ret; > > @@ -2637,14 +2641,15 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, > > > if (flags & I915_VMA_GLOBAL_BIND) { > + intel_runtime_pm_get(i915); > vma->vm->insert_entries(vma->vm, > vma->pages, vma->node.start, > cache_level, pte_flags); > + intel_runtime_pm_put(i915); > } > > if (flags & I915_VMA_LOCAL_BIND) { > - struct i915_hw_ppgtt *appgtt = > - to_i915(vma->vm->dev)->mm.aliasing_ppgtt; > + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; > appgtt->base.insert_entries(&appgtt->base, > vma->pages, vma->node.start, > cache_level, pte_flags); > @@ -2655,13 +2660,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, > > static void ggtt_unbind_vma(struct i915_vma *vma) > { > - struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; > + struct drm_i915_private *i915 = to_i915(vma->vm->dev); > + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; > const u64 size = min(vma->size, vma->node.size); > > - if (vma->flags & I915_VMA_GLOBAL_BIND) > + if (vma->flags & I915_VMA_GLOBAL_BIND) { > + intel_runtime_pm_get(i915); > vma->vm->clear_range(vma->vm, > vma->node.start, size, > true); > + intel_runtime_pm_put(i915); > + } > > if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) > appgtt->base.clear_range(&appgtt->base, > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c > index a14b1e3d4c78..08f796a4f5f6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c > @@ -204,8 +204,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, > return -EINVAL; > } > > - intel_runtime_pm_get(dev_priv); > - > mutex_lock(&dev->struct_mutex); > if (obj->pin_display || obj->framebuffer_references) { > err = -EBUSY; > @@ -301,8 +299,6 @@ err: > i915_gem_object_put(obj); > mutex_unlock(&dev->struct_mutex); > > - intel_runtime_pm_put(dev_priv); > - > return err; > } > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index ebb83d5a448b..3d9c2a21dfbd 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2523,7 +2523,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) > * simulated reset via debugs, so get an RPM reference. > */ > intel_runtime_pm_get(dev_priv); > - > intel_prepare_reset(dev_priv); > > /* > @@ -2535,7 +2534,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) > ret = i915_reset(dev_priv); > > intel_finish_reset(dev_priv); > - > intel_runtime_pm_put(dev_priv); > > if (ret == 0) > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index 43f833901b8e..a6b04da4bf21 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -1414,7 +1414,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, > struct register_whitelist const *entry = whitelist; > unsigned size; > i915_reg_t offset_ldw, offset_udw; > - int i, ret = 0; > + int i, ret; > > for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { > if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) && > @@ -1436,6 +1436,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, > > intel_runtime_pm_get(dev_priv); > > + ret = 0; > switch (size) { > case 8 | 1: > reg->val = I915_READ64_2x32(offset_ldw, offset_udw); > @@ -1454,10 +1455,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, > break; > default: > ret = -EINVAL; > - goto out; > + break; > } > > -out: > intel_runtime_pm_put(dev_priv); > return ret; > } > -- > 2.8.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index cea3ef7299cc..3244ef1401ad 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -969,8 +969,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, { unsigned int src_needs_clflush; unsigned int dst_needs_clflush; - void *dst, *ptr; - int offset, n; + void *dst; int ret; ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); @@ -987,24 +986,43 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, if (IS_ERR(dst)) goto unpin_dst; - ptr = dst; - offset = offset_in_page(batch_start_offset); - if (dst_needs_clflush & CLFLUSH_BEFORE) - batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size); - - for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { - int len = min_t(int, batch_len, PAGE_SIZE - offset); - void *vaddr; - - vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n)); - if (src_needs_clflush) - drm_clflush_virt_range(vaddr + offset, len); - memcpy(ptr, vaddr + offset, len); - kunmap_atomic(vaddr); - - ptr += len; - batch_len -= len; - offset = 0; + if (src_needs_clflush && + i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) { + void *src; + + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); + if (IS_ERR(src)) + goto shmem_copy; + + i915_memcpy_from_wc(dst, + src + batch_start_offset, + ALIGN(batch_len, 16)); + i915_gem_object_unpin_map(src_obj); + } else { + void *ptr; + int offset, n; + +shmem_copy: + offset = offset_in_page(batch_start_offset); + if (dst_needs_clflush & CLFLUSH_BEFORE) + batch_len = roundup(batch_len, + boot_cpu_data.x86_clflush_size); + + ptr = dst; + for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { + int len = min_t(int, batch_len, PAGE_SIZE - offset); + void *vaddr; + + vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) + drm_clflush_virt_range(vaddr + offset, len); + memcpy(ptr, vaddr + offset, len); + kunmap_atomic(vaddr); + + ptr += len; + batch_len -= len; + offset = 0; + } } /* dst_obj is returned with vmap pinned */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2fe88d930ca7..8dcdc27afe80 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -715,18 +715,13 @@ static int i915_gem_seqno_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine; - int ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); for_each_engine(engine, dev_priv) i915_ring_seqno_info(m, engine); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -1379,11 +1374,7 @@ static int ironlake_drpc_info(struct seq_file *m) struct drm_i915_private *dev_priv = to_i915(dev); u32 rgvmodectl, rstdbyctl; u16 crstandvid; - int ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); rgvmodectl = I915_READ(MEMMODECTL); @@ -1391,7 +1382,6 @@ static int ironlake_drpc_info(struct seq_file *m) crstandvid = I915_READ16(CRSTANDVID); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); seq_printf(m, "Boost freq: %d\n", @@ -2179,11 +2169,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) struct drm_info_node *node = m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); seq_printf(m, "bit6 swizzle for X-tiling = %s\n", @@ -2223,7 +2209,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data) seq_puts(m, "L-shaped memory detected\n"); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -4729,13 +4714,9 @@ i915_wedged_set(void *data, u64 val) if (i915_reset_in_progress(&dev_priv->gpu_error)) return -EAGAIN; - intel_runtime_pm_get(dev_priv); - i915_handle_error(dev_priv, val, "Manually setting wedged to %llu", val); - intel_runtime_pm_put(dev_priv); - return 0; } @@ -4976,20 +4957,15 @@ i915_cache_sharing_get(void *data, u64 *val) struct drm_device *dev = data; struct drm_i915_private *dev_priv = to_i915(dev); u32 snpcr; - int ret; if (!(IS_GEN6(dev) || IS_GEN7(dev))) return -ENODEV; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c040c6329804..b458faa0d349 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2293,24 +2293,6 @@ static int intel_runtime_suspend(struct device *device) DRM_DEBUG_KMS("Suspending device\n"); - /* - * We could deadlock here in case another thread holding struct_mutex - * calls RPM suspend concurrently, since the RPM suspend will wait - * first for this RPM suspend to finish. In this case the concurrent - * RPM resume will be followed by its RPM suspend counterpart. Still - * for consistency return -EAGAIN, which will reschedule this suspend. - */ - if (!mutex_trylock(&dev->struct_mutex)) { - DRM_DEBUG_KMS("device lock contention, deffering suspend\n"); - /* - * Bump the expiration timestamp, otherwise the suspend won't - * be rescheduled. - */ - pm_runtime_mark_last_busy(device); - - return -EAGAIN; - } - disable_rpm_wakeref_asserts(dev_priv); /* @@ -2318,7 +2300,6 @@ static int intel_runtime_suspend(struct device *device) * an RPM reference. */ i915_gem_release_all_mmaps(dev_priv); - mutex_unlock(&dev->struct_mutex); intel_guc_suspend(dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5c1acfc10bc4..a26bfd7d6aab 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1434,11 +1434,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret) goto err; - intel_runtime_pm_get(dev_priv); - ret = i915_mutex_lock_interruptible(dev); if (ret) - goto err_rpm; + goto err; ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise @@ -1449,7 +1447,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, */ if (!i915_gem_object_has_struct_page(obj) || cpu_write_needs_clflush(obj)) { + intel_runtime_pm_get(dev_priv); ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); + intel_runtime_pm_put(dev_priv); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between * textures). Fallback to the shmem path in that case. */ @@ -1464,12 +1464,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); - intel_runtime_pm_put(dev_priv); - return ret; -err_rpm: - intel_runtime_pm_put(dev_priv); err: i915_gem_object_put_unlocked(obj); return ret; @@ -1833,9 +1829,13 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) /* Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user * pagefault then has to wait until we release the mutex. + * + * Note that RPM complicates somewhat by adding an additional + * requirement that operations to the GGTT be made holding the RPM + * wakeref. This in turns allow us to release the mmap from within + * the RPM suspend code ignoring the struct_mutex serialisation in + * lieu of the RPM barriers. */ - lockdep_assert_held(&obj->base.dev->struct_mutex); - if (!obj->fault_mappable) return; @@ -1854,11 +1854,21 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) obj->fault_mappable = false; } +static void assert_rpm_release_all_mmaps(struct drm_i915_private *dev_priv) +{ + assert_rpm_wakelock_held(dev_priv); +} + void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj; + /* This should only be called by RPM as we require the bound_list + * to be protected by the RPM barriers and not struct_mutex. + * We check that we are holding the wakeref whenever we manipulate + * the dev_priv->mm.bound_list (via assert_rpm_release_all_mmaps). + */ list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) i915_gem_release_mmap(obj); } @@ -2402,9 +2412,11 @@ i915_gem_object_retire__read(struct i915_gem_active *active, * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ - if (obj->bind_count) + if (obj->bind_count) { + assert_rpm_release_all_mmaps(request->i915); list_move_tail(&obj->global_list, &request->i915->mm.bound_list); + } if (i915_gem_object_has_active_reference(obj)) { i915_gem_object_clear_active_reference(obj); @@ -2881,9 +2893,11 @@ int i915_vma_unbind(struct i915_vma *vma) /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ - if (--obj->bind_count == 0) + if (--obj->bind_count == 0) { + assert_rpm_release_all_mmaps(to_i915(obj->base.dev)); list_move_tail(&obj->global_list, &to_i915(obj->base.dev)->mm.unbound_list); + } /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be @@ -3071,6 +3085,7 @@ search_free: } GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); + assert_rpm_release_all_mmaps(dev_priv); list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); obj->bind_count++; @@ -3420,7 +3435,6 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; enum i915_cache_level level; @@ -3449,11 +3463,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - intel_runtime_pm_get(dev_priv); - ret = i915_mutex_lock_interruptible(dev); if (ret) - goto rpm_put; + return ret; obj = i915_gem_object_lookup(file, args->handle); if (!obj) { @@ -3462,13 +3474,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, } ret = i915_gem_object_set_cache_level(obj, level); - i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); -rpm_put: - intel_runtime_pm_put(dev_priv); - return ret; } @@ -4174,8 +4182,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) kfree(obj->bit_17); i915_gem_object_free(obj); - - intel_runtime_pm_put(dev_priv); } void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index fe7f9887ee67..67a3ff960b0d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2594,6 +2594,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { + struct drm_i915_private *i915 = to_i915(vma->vm->dev); struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags = 0; int ret; @@ -2606,8 +2607,10 @@ static int ggtt_bind_vma(struct i915_vma *vma, if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; + intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, cache_level, pte_flags); + intel_runtime_pm_get(i915); /* * Without aliasing PPGTT there's no difference between @@ -2623,6 +2626,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { + struct drm_i915_private *i915 = to_i915(vma->vm->dev); u32 pte_flags; int ret; @@ -2637,14 +2641,15 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_GLOBAL_BIND) { + intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, cache_level, pte_flags); + intel_runtime_pm_put(i915); } if (flags & I915_VMA_LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = - to_i915(vma->vm->dev)->mm.aliasing_ppgtt; + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; appgtt->base.insert_entries(&appgtt->base, vma->pages, vma->node.start, cache_level, pte_flags); @@ -2655,13 +2660,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, static void ggtt_unbind_vma(struct i915_vma *vma) { - struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; + struct drm_i915_private *i915 = to_i915(vma->vm->dev); + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; const u64 size = min(vma->size, vma->node.size); - if (vma->flags & I915_VMA_GLOBAL_BIND) + if (vma->flags & I915_VMA_GLOBAL_BIND) { + intel_runtime_pm_get(i915); vma->vm->clear_range(vma->vm, vma->node.start, size, true); + intel_runtime_pm_put(i915); + } if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) appgtt->base.clear_range(&appgtt->base, diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index a14b1e3d4c78..08f796a4f5f6 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -204,8 +204,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, return -EINVAL; } - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev->struct_mutex); if (obj->pin_display || obj->framebuffer_references) { err = -EBUSY; @@ -301,8 +299,6 @@ err: i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); - intel_runtime_pm_put(dev_priv); - return err; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ebb83d5a448b..3d9c2a21dfbd 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2523,7 +2523,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) * simulated reset via debugs, so get an RPM reference. */ intel_runtime_pm_get(dev_priv); - intel_prepare_reset(dev_priv); /* @@ -2535,7 +2534,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) ret = i915_reset(dev_priv); intel_finish_reset(dev_priv); - intel_runtime_pm_put(dev_priv); if (ret == 0) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 43f833901b8e..a6b04da4bf21 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1414,7 +1414,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, struct register_whitelist const *entry = whitelist; unsigned size; i915_reg_t offset_ldw, offset_udw; - int i, ret = 0; + int i, ret; for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) && @@ -1436,6 +1436,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, intel_runtime_pm_get(dev_priv); + ret = 0; switch (size) { case 8 | 1: reg->val = I915_READ64_2x32(offset_ldw, offset_udw); @@ -1454,10 +1455,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, break; default: ret = -EINVAL; - goto out; + break; } -out: intel_runtime_pm_put(dev_priv); return ret; }
If we need to use clflush to prepare our batch for reads from memory, we can bypass the cache instead by using non-temporal copies. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_cmd_parser.c | 58 ++++++++++++++++++++++------------ drivers/gpu/drm/i915/i915_debugfs.c | 24 -------------- drivers/gpu/drm/i915/i915_drv.c | 19 ----------- drivers/gpu/drm/i915/i915_gem.c | 48 ++++++++++++++++------------ drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++--- drivers/gpu/drm/i915/i915_gem_tiling.c | 4 --- drivers/gpu/drm/i915/i915_irq.c | 2 -- drivers/gpu/drm/i915/intel_uncore.c | 6 ++-- 8 files changed, 81 insertions(+), 97 deletions(-)