From patchwork Tue Apr 22 15:16:36 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Abdiel Janulgue X-Patchwork-Id: 4033251 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork2.web.kernel.org (Postfix) with ESMTP id 2463CC0DA2 for ; Tue, 22 Apr 2014 15:13:26 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id AF3A320221 for ; Tue, 22 Apr 2014 15:13:21 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by mail.kernel.org (Postfix) with ESMTP id 7A93720219 for ; Tue, 22 Apr 2014 15:13:16 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id E2FA86E8DD; Tue, 22 Apr 2014 08:13:15 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by gabe.freedesktop.org (Postfix) with ESMTP id 0C0E66E8D9 for ; Tue, 22 Apr 2014 08:13:11 -0700 (PDT) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga102.jf.intel.com with ESMTP; 22 Apr 2014 08:07:25 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.97,904,1389772800"; d="scan'208";a="497433960" Received: from abj-desktop.fi.intel.com ([10.237.72.192]) by orsmga001.jf.intel.com with ESMTP; 22 Apr 2014 08:11:02 -0700 From: Abdiel Janulgue To: intel-gfx@lists.freedesktop.org Date: Tue, 22 Apr 2014 18:16:36 +0300 Message-Id: <1398179796-5103-3-git-send-email-abdiel.janulgue@linux.intel.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1398179796-5103-1-git-send-email-abdiel.janulgue@linux.intel.com> References: <1398179796-5103-1-git-send-email-abdiel.janulgue@linux.intel.com> Subject: [Intel-gfx] [RFC PATCH 2/2] xf86-video-intel: Enable hw-generated binding tables for UXA X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Spam-Status: No, score=-4.8 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Code is based on my hw-generated binding table code for Mesa adapted to i965_composite path in UXA. Signed-off-by: Abdiel Janulgue --- src/uxa/i965_3d.c | 5 ++- src/uxa/i965_reg.h | 8 +++++ src/uxa/i965_render.c | 78 +++++++++++++++++++++++++++++++++++-------- src/uxa/intel_batchbuffer.c | 7 ++-- 4 files changed, 80 insertions(+), 18 deletions(-) diff --git a/src/uxa/i965_3d.c b/src/uxa/i965_3d.c index 757a979..afbb5a7 100644 --- a/src/uxa/i965_3d.c +++ b/src/uxa/i965_3d.c @@ -406,7 +406,10 @@ gen7_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset) { OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); - OUT_BATCH(ps_binding_table_offset); + if (intel->use_resource_streamer) + OUT_BATCH(ps_binding_table_offset >> 1); + else + OUT_BATCH(ps_binding_table_offset); } void diff --git a/src/uxa/i965_reg.h b/src/uxa/i965_reg.h index a934a67..157b212 100644 --- a/src/uxa/i965_reg.h +++ b/src/uxa/i965_reg.h @@ -296,6 +296,14 @@ /* DW1 */ # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 +/* GEN7+ resource streamer */ +#define GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC BRW_3D(3, 1, 0x19) +# define BINDING_TABLE_POOL_ENABLE 0x0860 +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_VS BRW_3D(3, 0, 0x43) +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_GS BRW_3D(3, 0, 0x44) +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_HS BRW_3D(3, 0, 0x45) +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_DS BRW_3D(3, 0, 0x46) +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_PS BRW_3D(3, 0, 0x47) #define PIPELINE_SELECT_3D 0 #define PIPELINE_SELECT_MEDIA 1 diff --git a/src/uxa/i965_render.c b/src/uxa/i965_render.c index 74f57af..d5225dd 100644 --- a/src/uxa/i965_render.c +++ b/src/uxa/i965_render.c @@ -1783,6 +1783,10 @@ static void i965_surface_flush(struct intel_screen_private *intel) sizeof(intel->surface_data), 4096); assert(intel->surface_bo); + drm_intel_bo_unreference(intel->hw_bt_pool_bo); + intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt", + 131072, 4096); + assert(intel->hw_bt_pool_bo); return; (void)ret; } @@ -2217,32 +2221,70 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel) static void i965_bind_surfaces(struct intel_screen_private *intel) { uint32_t *binding_table; + uint32_t surf0 = 0, surf1 = 0, surf2 = 0; assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data)); - binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); - intel->surface_table = intel->surface_used; - intel->surface_used += SURFACE_STATE_PADDED_SIZE; - - binding_table[0] = - i965_set_picture_surface_state(intel, + surf0 = i965_set_picture_surface_state(intel, intel->render_dest_picture, intel->render_dest, TRUE); - binding_table[1] = - i965_set_picture_surface_state(intel, + surf1 = i965_set_picture_surface_state(intel, intel->render_source_picture, intel->render_source, FALSE); if (intel->render_mask) { - binding_table[2] = - i965_set_picture_surface_state(intel, - intel->render_mask_picture, - intel->render_mask, - FALSE); + surf2 = i965_set_picture_surface_state(intel, + intel->render_mask_picture, + intel->render_mask, + FALSE); + } + + if (intel->use_resource_streamer) { + intel->surface_table += (256 * sizeof(uint16_t)); + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (5 - 2)); + OUT_BATCH(0x3); + { + OUT_BATCH(0 << 16 | surf0 >> 5); + OUT_BATCH(1 << 16 | surf1 >> 5); + OUT_BATCH(2 << 16 | surf2 >> 5); + } + } else { + binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); + intel->surface_table = intel->surface_used; + intel->surface_used += SURFACE_STATE_PADDED_SIZE; + + binding_table[0] = surf0; + binding_table[1] = surf1; + binding_table[2] = surf2; } } +static void i965_enable_hw_binding_table(struct intel_screen_private *intel) +{ + if (!intel->use_resource_streamer) + return; + + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2)); + OUT_RELOC(intel->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, + BINDING_TABLE_POOL_ENABLE); + OUT_RELOC(intel->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, + intel->hw_bt_pool_bo->size); + + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_GLOBAL_GTT); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + + /* Do a block clear for existing on-chip binding table entries + that might have stuck from the old batch. Otherwise, this + causes GPU hungs + */ + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (3 - 2)); + OUT_BATCH(0xffff << 16 | 0x3 ); + OUT_BATCH(0); +} + void i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h) @@ -2252,6 +2294,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, intel_batch_start_atomic(scrn, 200); if (intel->needs_render_state_emit) { + i965_enable_hw_binding_table(intel); i965_bind_surfaces(intel); if (INTEL_INFO(intel)->gen >= 060) @@ -2349,6 +2392,8 @@ void gen4_render_state_init(ScrnInfoPtr scrn) drm_intel_bo_alloc(intel->bufmgr, "surface data", sizeof(intel->surface_data), 4096); assert(intel->surface_bo); + intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt", + 131072, 4096); intel->surface_used = 0; @@ -2445,6 +2490,7 @@ void gen4_render_state_cleanup(ScrnInfoPtr scrn) int i, j, k, l, m; drm_intel_bo_unreference(intel->surface_bo); + drm_intel_bo_unreference(intel->hw_bt_pool_bo); drm_intel_bo_unreference(render_state->vs_state_bo); drm_intel_bo_unreference(render_state->sf_state_bo); drm_intel_bo_unreference(render_state->sf_mask_state_bo); @@ -2571,9 +2617,13 @@ gen6_composite_create_depth_stencil_state(intel_screen_private *intel) (void)ret; } +#define MI_RS_CONTROL (0x6 << 23) + static void gen6_composite_state_base_address(intel_screen_private *intel) { + OUT_BATCH(MI_RS_CONTROL | 0x0); + OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ intel->surface_reloc = intel->batch_used; @@ -2586,6 +2636,8 @@ gen6_composite_state_base_address(intel_screen_private *intel) OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ + + OUT_BATCH(MI_RS_CONTROL | 0x1); } static void diff --git a/src/uxa/intel_batchbuffer.c b/src/uxa/intel_batchbuffer.c index dedf7f8..347413b 100644 --- a/src/uxa/intel_batchbuffer.c +++ b/src/uxa/intel_batchbuffer.c @@ -260,13 +260,12 @@ void intel_batch_submit(ScrnInfoPtr scrn) } ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr); + uint32_t flags = HAS_BLT(intel) ? intel->current_batch: I915_EXEC_DEFAULT; + flags |= intel->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0; if (ret == 0) { ret = drm_intel_bo_mrb_exec(intel->batch_bo, intel->batch_used*4, - NULL, 0, 0xffffffff, - (HAS_BLT(intel) ? - intel->current_batch: - I915_EXEC_DEFAULT)); + NULL, 0, 0xffffffff, flags); } if (ret != 0) {