From patchwork Sat Nov 10 15:15:47 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 1723841 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork1.kernel.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by patchwork1.kernel.org (Postfix) with ESMTP id 6174A3FC8F for ; Sat, 10 Nov 2012 15:20:21 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 3AFBC9EB22 for ; Sat, 10 Nov 2012 07:20:21 -0800 (PST) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from relay.fireflyinternet.com (relay1.fireflyinternet.com [217.160.24.105]) by gabe.freedesktop.org (Postfix) with ESMTP id 9903B9E935 for ; Sat, 10 Nov 2012 07:18:50 -0800 (PST) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=109.228.6.235; Received: from fireflyinternet.com (unverified [109.228.6.235]) by relay.fireflyinternet.com (FireflyRelay1) with ESMTP id 47667-2000100 for ; Sat, 10 Nov 2012 15:18:28 +0000 X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.73.22; Received: from arrandale.alporthouse.com (unverified [78.156.73.22]) by fireflyinternet.com (Firefly Internet SMTP) with ESMTP id 125215276-1500050 for multiple; Sat, 10 Nov 2012 15:18:46 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sat, 10 Nov 2012 15:15:47 +0000 Message-Id: <1352560547-1423-4-git-send-email-chris@chris-wilson.co.uk> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1352560547-1423-1-git-send-email-chris@chris-wilson.co.uk> References: <1352560547-1423-1-git-send-email-chris@chris-wilson.co.uk> X-Originating-IP: 78.156.73.22 Subject: [Intel-gfx] [PATCH 4/4] drm/i915: Use the reloc.handle as an index into the execbuffer array X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.13 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 623000.0/sec. i3-330m: 748000.0/sec to 789000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 +++++++++++++++++----------- include/uapi/drm/i915_drm.h | 6 ++ 3 files changed, 63 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a0c4b4f..a35217d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1020,6 +1020,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_NO_RELOC: value = 1; break; + case I915_PARAM_HAS_EXEC_HANDLE_LUT: + value = 1; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", param->param); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 30beea6..9ccd860 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -39,25 +39,40 @@ struct eb_objects { struct list_head objects; - int and; - struct hlist_head buckets[0]; + unsigned int and; + union { + struct drm_i915_gem_object *lut[0]; + struct hlist_head buckets[0]; + }; }; static struct eb_objects * -eb_create(int size) +eb_create(struct drm_i915_gem_execbuffer2 *args) { - struct eb_objects *eb; - int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; - BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); - while (count > size) - count >>= 1; - eb = kzalloc(count*sizeof(struct hlist_head) + - sizeof(struct eb_objects), - GFP_KERNEL); - if (eb == NULL) - return eb; - - eb->and = count - 1; + struct eb_objects *eb = NULL; + + if (args->flags & I915_EXEC_HANDLE_LUT) { + int size = args->buffer_count; + size *= sizeof(struct drm_i915_gem_object); + size += sizeof(struct eb_objects); + eb = kzalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + } + + if (eb == NULL) { + int size = args->buffer_count; + int count = (PAGE_SIZE - sizeof(struct eb_objects)) / sizeof(struct hlist_head); + BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); + while (count > 2*size) + count >>= 1; + eb = kzalloc(count*sizeof(struct hlist_head) + + sizeof(struct eb_objects), + GFP_TEMPORARY); + if (eb == NULL) + return eb; + + eb->and = count - 1; + } + INIT_LIST_HEAD(&eb->objects); return eb; } @@ -65,14 +80,8 @@ eb_create(int size) static void eb_reset(struct eb_objects *eb) { - memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); -} - -static void -eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) -{ - hlist_add_head(&obj->exec_node, - &eb->buckets[obj->exec_handle & eb->and]); + if (eb->and) + memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } static int @@ -105,9 +114,14 @@ eb_lookup_objects(struct eb_objects *eb, drm_gem_object_reference(&obj->base); list_add_tail(&obj->exec_list, &eb->objects); - obj->exec_handle = exec[i].handle; obj->exec_entry = &exec[i]; - eb_add_object(eb, obj); + if (eb->and == 0) { + eb->lut[i] = obj; + } else { + obj->exec_handle = exec[i].handle; + hlist_add_head(&obj->exec_node, + &eb->buckets[exec[i].handle & eb->and]); + } } spin_unlock(&file->table_lock); @@ -117,18 +131,22 @@ eb_lookup_objects(struct eb_objects *eb, static struct drm_i915_gem_object * eb_get_object(struct eb_objects *eb, unsigned long handle) { - struct hlist_head *head; - struct hlist_node *node; - struct drm_i915_gem_object *obj; + if (eb->and == 0) { + return eb->lut[handle]; + } else { + struct hlist_head *head; + struct hlist_node *node; - head = &eb->buckets[handle & eb->and]; - hlist_for_each(node, head) { - obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); - if (obj->exec_handle == handle) - return obj; - } + head = &eb->buckets[handle & eb->and]; + hlist_for_each(node, head) { + struct drm_i915_gem_object *obj; - return NULL; + obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); + if (obj->exec_handle == handle) + return obj; + } + return NULL; + } } static void @@ -988,7 +1006,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - eb = eb_create(args->buffer_count); + eb = eb_create(args); if (eb == NULL) { mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7657d3e..82c1088 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -310,6 +310,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 #define I915_PARAM_HAS_EXEC_NO_RELOC 24 +#define I915_PARAM_HAS_EXEC_HANDLE_LUT 25 typedef struct drm_i915_getparam { int param; @@ -690,6 +691,11 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_NO_RELOC (1<<10) +/** Use the reloc.handle as an index into the exec object array rather + * than as the per-file handle. + */ +#define I915_EXEC_HANDLE_LUT (1<<11) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK