From patchwork Tue Jan 8 10:53:10 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 1945281 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork2.kernel.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by patchwork2.kernel.org (Postfix) with ESMTP id 5CE10DF23A for ; Tue, 8 Jan 2013 10:59:47 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 3903BE61E9 for ; Tue, 8 Jan 2013 02:59:47 -0800 (PST) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (smtp.fireflyinternet.com [109.228.6.236]) by gabe.freedesktop.org (Postfix) with ESMTP id D37BFE63E1 for ; Tue, 8 Jan 2013 02:54:39 -0800 (PST) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.73.22; Received: from arrandale.alporthouse.com (unverified [78.156.73.22]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 127585744-1500050 for multiple; Tue, 08 Jan 2013 10:54:23 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 8 Jan 2013 10:53:10 +0000 Message-Id: <1357642399-7678-3-git-send-email-chris@chris-wilson.co.uk> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1357642399-7678-1-git-send-email-chris@chris-wilson.co.uk> References: <1357642399-7678-1-git-send-email-chris@chris-wilson.co.uk> X-Originating-IP: 78.156.73.22 Subject: [Intel-gfx] [PATCH 02/11] drm/i915: Avoid forcing relocations through the mappable GTT or CPU X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.13 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org If the object lies outside of the mappable GTT aperture, do not force it through the CPU domain for relocations, but simply flush the writes as we perform them and then queue a chipset flush. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 87 ++++++++++++++++------------ 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 163bb52..daa5409 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -33,6 +33,9 @@ #include "intel_drv.h" #include +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) + struct eb_objects { int and; struct hlist_head buckets[0]; @@ -95,10 +98,16 @@ eb_destroy(struct eb_objects *eb) static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || - !obj->map_and_fenceable || obj->cache_level != I915_CACHE_NONE); } +static inline struct page * +gtt_offset_to_page(struct drm_i915_gem_object *obj, u32 offset) +{ + offset -= obj->gtt_space->start; + return i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, @@ -182,22 +191,20 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EFAULT; reloc->delta += target_offset; + reloc->offset += obj->gtt_offset; if (use_cpu_reloc(obj)) { - uint32_t page_offset = reloc->offset & ~PAGE_MASK; char *vaddr; - ret = i915_gem_object_set_to_cpu_domain(obj, 1); + ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) return ret; - vaddr = kmap_atomic(i915_gem_object_get_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = reloc->delta; + vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset)); + *(uint32_t *)(vaddr + offset_in_page(reloc->offset)) = reloc->delta; kunmap_atomic(vaddr); } else { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t __iomem *reloc_entry; - void __iomem *reloc_page; + unsigned page_offset; ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -208,13 +215,28 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return ret; /* Map the page containing the relocation we're going to perform. */ - reloc->offset += obj->gtt_offset; - reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + (reloc->offset & ~PAGE_MASK)); - iowrite32(reloc->delta, reloc_entry); - io_mapping_unmap_atomic(reloc_page); + page_offset = offset_in_page(reloc->offset); + + if (reloc->offset < dev_priv->mm.gtt_mappable_end) { + void __iomem *reloc_page; + + reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + reloc->offset & PAGE_MASK); + iowrite32(reloc->delta, reloc_page + page_offset); + io_mapping_unmap_atomic(reloc_page); + } else { + char *vaddr; + + vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset)); + + drm_clflush_virt_range(vaddr + page_offset, 4); + *(uint32_t *)(vaddr + page_offset) = reloc->delta; + drm_clflush_virt_range(vaddr + page_offset, 4); + + kunmap_atomic(vaddr); + + obj->base.pending_write_domain |= I915_GEM_DOMAIN_CPU; + } } /* and update the user's relocation entry */ @@ -312,16 +334,6 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, return ret; } -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) - -static int -need_reloc_mappable(struct drm_i915_gem_object *obj) -{ - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - return entry->relocation_count && !use_cpu_reloc(obj); -} - static int i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring) @@ -329,16 +341,15 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = obj->base.dev->dev_private; struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - bool need_fence, need_mappable; + bool need_fence; int ret; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); - ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); + ret = i915_gem_object_pin(obj, entry->alignment, need_fence, false); if (ret) return ret; @@ -401,7 +412,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, INIT_LIST_HEAD(&ordered_objects); while (!list_empty(objects)) { struct drm_i915_gem_exec_object2 *entry; - bool need_fence, need_mappable; + bool need_fence; obj = list_first_entry(objects, struct drm_i915_gem_object, @@ -412,9 +423,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); - if (need_mappable) + if (need_fence) list_move(&obj->exec_list, &ordered_objects); else list_move_tail(&obj->exec_list, &ordered_objects); @@ -444,7 +454,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, /* Unbind any ill-fitting objects or pin. */ list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - bool need_fence, need_mappable; + bool need_fence; if (!obj->gtt_space) continue; @@ -453,10 +463,9 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || - (need_mappable && !obj->map_and_fenceable)) + (need_fence && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else ret = i915_gem_execbuffer_reserve_object(obj, ring); @@ -603,10 +612,16 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, if (ret) return ret; + flush_domains |= obj->base.write_domain; + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj); - flush_domains |= obj->base.write_domain; + /* Used as an internal marker during relocation processing */ + if (obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS) { + flush_domains |= obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS; + obj->base.pending_write_domain &= I915_GEM_GPU_DOMAINS; + } } if (flush_domains & I915_GEM_DOMAIN_CPU) @@ -923,7 +938,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (batch_obj->base.pending_write_domain & I915_GEM_GPU_DOMAINS) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err;