From patchwork Mon Dec 3 11:49:12 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 1832771 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork2.kernel.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by patchwork2.kernel.org (Postfix) with ESMTP id 78C9BDF2F9 for ; Mon, 3 Dec 2012 11:56:39 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8A3B1E5CE2 for ; Mon, 3 Dec 2012 03:56:39 -0800 (PST) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from relay.fireflyinternet.com (relay1.fireflyinternet.com [217.160.24.105]) by gabe.freedesktop.org (Postfix) with ESMTP id A567FE6013 for ; Mon, 3 Dec 2012 03:50:49 -0800 (PST) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=109.228.6.235; Received: from fireflyinternet.com (unverified [109.228.6.235]) by relay.fireflyinternet.com (FireflyRelay1) with ESMTP id 782843-2000100 for ; Mon, 03 Dec 2012 11:52:47 +0000 X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.73.22; Received: from arrandale.alporthouse.com (unverified [78.156.73.22]) by fireflyinternet.com (Firefly Internet SMTP) with ESMTP id 125476770-1500050 for multiple; Mon, 03 Dec 2012 11:50:44 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 3 Dec 2012 11:49:12 +0000 Message-Id: <1354535352-3506-15-git-send-email-chris@chris-wilson.co.uk> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1354535352-3506-1-git-send-email-chris@chris-wilson.co.uk> References: <1354535352-3506-1-git-send-email-chris@chris-wilson.co.uk> X-Originating-IP: 78.156.73.22 Subject: [Intel-gfx] [PATCH 14/14] drm/i915: Allow userspace to request an object at a specific offset X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.13 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Certain workarounds and workloads require objects at specific or at least known offsets. Privileged users could pin an object into the GTT, but that has obvious limitations for the general case. Instead, the user can construct a batch assuming a particular layout for an object and request that the kernel try its utmost to provide the object at that location. This has the advantage that not only can it fail, but also such allocations are transitory - although contention should be rare and the object persist at the same location between batches. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_gem.c | 6 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 103 +++++++++++++++++++++++++++- include/uapi/drm/i915_drm.h | 3 +- 4 files changed, 109 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dd67f94..931d722 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1394,6 +1394,10 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); void i915_gem_free_object(struct drm_gem_object *obj); +bool i915_gem_valid_gtt_space(struct drm_device *dev, + struct drm_mm_node *gtt_space, + unsigned long cache_level); + int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, uint32_t alignment, bool map_and_fenceable, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 86d549b..a014784 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2815,9 +2815,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; } -static bool i915_gem_valid_gtt_space(struct drm_device *dev, - struct drm_mm_node *gtt_space, - unsigned long cache_level) +bool i915_gem_valid_gtt_space(struct drm_device *dev, + struct drm_mm_node *gtt_space, + unsigned long cache_level) { struct drm_mm_node *other; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 17b09bd..3fc07ef 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -405,6 +405,90 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, return ret; } +static struct drm_mm_node * +get_pinned_block(struct drm_i915_gem_object *obj) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_mm_node *gtt; + + gtt = drm_mm_create_block(&dev_priv->mm.gtt_space, + obj->exec_entry->offset, + i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode), + false); + if (gtt == NULL) + return NULL; + + if (!i915_gem_valid_gtt_space(dev, gtt, obj->cache_level)) { + drm_mm_put_block(gtt); + return NULL; + } + + gtt->color = obj->cache_level; + return gtt; +} + +static int +i915_gem_execbuffer_pinned_object(struct drm_i915_gem_object *obj) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + struct drm_mm_node *gtt; + int ret; + + if (obj->gtt_offset == entry->offset) + return 0; + + if (entry->offset & (i915_gem_get_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1)) + return -EINVAL; + + if (entry->alignment && entry->offset & (entry->alignment - 1)) + return -EINVAL; + + i915_gem_object_pin_pages(obj); + + ret = i915_gem_object_unbind(obj); + if (ret) + goto unpin_pages; + + gtt = get_pinned_block(obj); + if (gtt == NULL) { + ret = i915_gem_evict_everything(dev); + if (ret) + goto unpin_pages; + + gtt = get_pinned_block(obj); + } + if (gtt == NULL) { + ret = -EBUSY; + goto unpin_pages; + } + + ret = i915_gem_gtt_prepare_object(obj); + if (ret) { + drm_mm_put_block(gtt); + goto unpin_pages; + } + + list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); + list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); + + obj->gtt_space = gtt; + obj->gtt_offset += gtt->start; + + obj->map_and_fenceable = + obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; + trace_i915_gem_object_bind(obj, false); + + if (!dev_priv->mm.aliasing_ppgtt) + i915_gem_gtt_bind_object(obj, obj->cache_level); + +unpin_pages: + i915_gem_object_unpin_pages(obj); + return ret; +} + static int i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring, @@ -416,6 +500,12 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, bool need_fence; int ret; + if (entry->flags & EXEC_OBJECT_PINNED) { + ret = i915_gem_execbuffer_pinned_object(obj); + if (ret) + return ret; + } + need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && @@ -427,6 +517,10 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, entry->flags |= __EXEC_OBJECT_HAS_PIN; + if (entry->flags & EXEC_OBJECT_PINNED && + obj->gtt_offset != entry->offset) + return -EINVAL; + if (has_fenced_gpu_access) { if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { ret = i915_gem_object_get_fence(obj); @@ -489,11 +583,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, bool *need_relocs) { struct drm_i915_gem_object *obj; - struct list_head ordered_objects; + struct list_head ordered_objects, pinned_objects; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; int retry; INIT_LIST_HEAD(&ordered_objects); + INIT_LIST_HEAD(&pinned_objects); while (!list_empty(objects)) { struct drm_i915_gem_exec_object2 *entry; bool need_fence; @@ -507,8 +602,9 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - - if (need_fence) + if (entry->flags & EXEC_OBJECT_PINNED) + list_move(&obj->exec_list, &pinned_objects); + else if (need_fence) list_move(&obj->exec_list, &ordered_objects); else list_move_tail(&obj->exec_list, &ordered_objects); @@ -517,6 +613,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, obj->base.pending_write_domain = 0; obj->pending_fenced_gpu_access = false; } + list_splice(&pinned_objects, &ordered_objects); list_splice(&ordered_objects, objects); /* Attempt to pin all of the buffers into the GTT. diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6210872..525ab30 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -631,7 +631,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_FENCE (1<<0) #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) +#define EXEC_OBJECT_PINNED (1<<3) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) __u64 flags; __u64 rsvd1;