diff mbox

[RFC,2/2] xf86-video-intel: Enable hw-generated binding tables for UXA

Message ID 1398179796-5103-3-git-send-email-abdiel.janulgue@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Abdiel Janulgue April 22, 2014, 3:16 p.m. UTC
Code is based on my hw-generated binding table code for Mesa
adapted to i965_composite path in UXA.

Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
---
 src/uxa/i965_3d.c           |    5 ++-
 src/uxa/i965_reg.h          |    8 +++++
 src/uxa/i965_render.c       |   78 +++++++++++++++++++++++++++++++++++--------
 src/uxa/intel_batchbuffer.c |    7 ++--
 4 files changed, 80 insertions(+), 18 deletions(-)
diff mbox

Patch

diff --git a/src/uxa/i965_3d.c b/src/uxa/i965_3d.c
index 757a979..afbb5a7 100644
--- a/src/uxa/i965_3d.c
+++ b/src/uxa/i965_3d.c
@@ -406,7 +406,10 @@  gen7_upload_binding_table(intel_screen_private *intel,
 			  uint32_t ps_binding_table_offset)
 {
 	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-	OUT_BATCH(ps_binding_table_offset);
+	if (intel->use_resource_streamer)
+		OUT_BATCH(ps_binding_table_offset >> 1);
+	else
+		OUT_BATCH(ps_binding_table_offset);
 }
 
 void
diff --git a/src/uxa/i965_reg.h b/src/uxa/i965_reg.h
index a934a67..157b212 100644
--- a/src/uxa/i965_reg.h
+++ b/src/uxa/i965_reg.h
@@ -296,6 +296,14 @@ 
 /* DW1 */
 # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
 
+/* GEN7+ resource streamer */
+#define GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC           BRW_3D(3, 1, 0x19)
+# define BINDING_TABLE_POOL_ENABLE              0x0860
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_VS              BRW_3D(3, 0, 0x43)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_GS              BRW_3D(3, 0, 0x44)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_HS              BRW_3D(3, 0, 0x45)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_DS              BRW_3D(3, 0, 0x46)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_PS              BRW_3D(3, 0, 0x47)
 
 #define PIPELINE_SELECT_3D		0
 #define PIPELINE_SELECT_MEDIA		1
diff --git a/src/uxa/i965_render.c b/src/uxa/i965_render.c
index 74f57af..d5225dd 100644
--- a/src/uxa/i965_render.c
+++ b/src/uxa/i965_render.c
@@ -1783,6 +1783,10 @@  static void i965_surface_flush(struct intel_screen_private *intel)
 				   sizeof(intel->surface_data), 4096);
 	assert(intel->surface_bo);
 
+	drm_intel_bo_unreference(intel->hw_bt_pool_bo);
+	intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt",
+						  131072, 4096);
+	assert(intel->hw_bt_pool_bo);
 	return;
 	(void)ret;
 }
@@ -2217,32 +2221,70 @@  static void i965_select_vertex_buffer(struct intel_screen_private *intel)
 static void i965_bind_surfaces(struct intel_screen_private *intel)
 {
 	uint32_t *binding_table;
+	uint32_t surf0 = 0, surf1 = 0, surf2 = 0;
 
 	assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data));
 
-	binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
-	intel->surface_table = intel->surface_used;
-	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
-
-	binding_table[0] =
-		i965_set_picture_surface_state(intel,
+	surf0 = i965_set_picture_surface_state(intel,
 					       intel->render_dest_picture,
 					       intel->render_dest,
 					       TRUE);
-	binding_table[1] =
-		i965_set_picture_surface_state(intel,
+	surf1 = i965_set_picture_surface_state(intel,
 					       intel->render_source_picture,
 					       intel->render_source,
 					       FALSE);
 	if (intel->render_mask) {
-		binding_table[2] =
-			i965_set_picture_surface_state(intel,
-						       intel->render_mask_picture,
-						       intel->render_mask,
-						       FALSE);
+		surf2  = i965_set_picture_surface_state(intel,
+							intel->render_mask_picture,
+							intel->render_mask,
+							FALSE);
+	}
+
+	if (intel->use_resource_streamer) {
+		intel->surface_table += (256 * sizeof(uint16_t));
+		OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (5 - 2));
+		OUT_BATCH(0x3);
+		{
+			OUT_BATCH(0 << 16 | surf0 >> 5);
+			OUT_BATCH(1 << 16 | surf1 >> 5);
+			OUT_BATCH(2 << 16 | surf2 >> 5);
+		}
+	} else {
+		binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
+		intel->surface_table = intel->surface_used;
+		intel->surface_used += SURFACE_STATE_PADDED_SIZE;
+
+		binding_table[0] = surf0;
+		binding_table[1] = surf1;
+		binding_table[2] = surf2;
 	}
 }
 
+static void i965_enable_hw_binding_table(struct intel_screen_private *intel)
+{
+	if (!intel->use_resource_streamer)
+		return;
+
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
+	OUT_RELOC(intel->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+		  BINDING_TABLE_POOL_ENABLE);
+	OUT_RELOC(intel->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+		  intel->hw_bt_pool_bo->size);
+
+	OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+	OUT_BATCH(BRW_PIPE_CONTROL_GLOBAL_GTT);
+	OUT_BATCH(0); /* address */
+	OUT_BATCH(0); /* write data */
+
+	/* Do a block clear for existing on-chip binding table entries
+	   that might have stuck from the old batch. Otherwise, this
+	   causes GPU hungs
+	*/
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (3 - 2));
+	OUT_BATCH(0xffff << 16 | 0x3 );
+	OUT_BATCH(0);
+}
+
 void
 i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	       int dstX, int dstY, int w, int h)
@@ -2252,6 +2294,7 @@  i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 
 	intel_batch_start_atomic(scrn, 200);
 	if (intel->needs_render_state_emit) {
+		i965_enable_hw_binding_table(intel);
 		i965_bind_surfaces(intel);
 
 		if (INTEL_INFO(intel)->gen >= 060)
@@ -2349,6 +2392,8 @@  void gen4_render_state_init(ScrnInfoPtr scrn)
 		drm_intel_bo_alloc(intel->bufmgr, "surface data",
 				   sizeof(intel->surface_data), 4096);
 	assert(intel->surface_bo);
+	intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt",
+						  131072, 4096);
 
 	intel->surface_used = 0;
 
@@ -2445,6 +2490,7 @@  void gen4_render_state_cleanup(ScrnInfoPtr scrn)
 	int i, j, k, l, m;
 
 	drm_intel_bo_unreference(intel->surface_bo);
+	drm_intel_bo_unreference(intel->hw_bt_pool_bo);
 	drm_intel_bo_unreference(render_state->vs_state_bo);
 	drm_intel_bo_unreference(render_state->sf_state_bo);
 	drm_intel_bo_unreference(render_state->sf_mask_state_bo);
@@ -2571,9 +2617,13 @@  gen6_composite_create_depth_stencil_state(intel_screen_private *intel)
 	(void)ret;
 }
 
+#define MI_RS_CONTROL                           (0x6 << 23)
+
 static void
 gen6_composite_state_base_address(intel_screen_private *intel)
 {
+	OUT_BATCH(MI_RS_CONTROL | 0x0);
+
 	OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
 	OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
 	intel->surface_reloc = intel->batch_used;
@@ -2586,6 +2636,8 @@  gen6_composite_state_base_address(intel_screen_private *intel)
 	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
 	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
 	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+
+	OUT_BATCH(MI_RS_CONTROL | 0x1);
 }
 
 static void
diff --git a/src/uxa/intel_batchbuffer.c b/src/uxa/intel_batchbuffer.c
index dedf7f8..347413b 100644
--- a/src/uxa/intel_batchbuffer.c
+++ b/src/uxa/intel_batchbuffer.c
@@ -260,13 +260,12 @@  void intel_batch_submit(ScrnInfoPtr scrn)
 	}
 
 	ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr);
+	uint32_t flags = HAS_BLT(intel) ? intel->current_batch: I915_EXEC_DEFAULT;
+	flags |= intel->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
 	if (ret == 0) {
 		ret = drm_intel_bo_mrb_exec(intel->batch_bo,
 				intel->batch_used*4,
-				NULL, 0, 0xffffffff,
-				(HAS_BLT(intel) ?
-				 intel->current_batch:
-				 I915_EXEC_DEFAULT));
+				NULL, 0, 0xffffffff, flags);
 	}
 
 	if (ret != 0) {