diff mbox

[2/2] use BLT command to accelerate uxa on gen6.

Message ID 1288078396-6002-2-git-send-email-nanhai.zou@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zou, Nanhai Oct. 26, 2010, 7:33 a.m. UTC
None
diff mbox

Patch

diff --git a/src/i830_reg.h b/src/i830_reg.h
index 4080896..93d03cf 100644
--- a/src/i830_reg.h
+++ b/src/i830_reg.h
@@ -32,6 +32,8 @@ 
 
 /* Flush */
 #define MI_FLUSH			(0x04<<23)
+#define MI_FLUSH_DW			(0x26<<23)
+
 #define MI_WRITE_DIRTY_STATE		(1<<4)
 #define MI_END_SCENE			(1<<3)
 #define MI_GLOBAL_SNAPSHOT_COUNT_RESET	(1<<3)
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 3b72ba1..cde086b 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -171,6 +171,12 @@  void intel_batch_emit_flush(ScrnInfoPtr scrn, int batch_idx)
 
 		intel_batch_do_flush(scrn, batch_idx);
 	} else {
+		BEGIN_BATCH_BLT(4);
+		OUT_BATCH_BLT(MI_FLUSH_DW | 2);
+		OUT_BATCH_BLT(0);
+		OUT_BATCH_BLT(0);
+		OUT_BATCH_BLT(0);
+		ADVANCE_BATCH_BLT();
 	}
 }
 
@@ -193,13 +199,22 @@  void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx)
 
 	if (batch->batch_used == 0)
 		return;
+	
+	if (batch_idx == RENDER_BATCH) {
+		/* Mark the end of the batchbuffer. */
+		OUT_BATCH(MI_BATCH_BUFFER_END);
+		/* Emit a padding dword if we aren't going to be quad-word aligned. */
+		if (batch->batch_used & 1)
+			OUT_BATCH(MI_NOOP);
+	} else {
+		/* Mark the end of the batchbuffer. */
+		OUT_BATCH_BLT(MI_BATCH_BUFFER_END);
+		/* Emit a padding dword if we aren't going to be quad-word aligned. */
+		if (batch->batch_used & 1)
+			OUT_BATCH_BLT(MI_NOOP);
+	}
 
-	/* Mark the end of the batchbuffer. */
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-	/* Emit a padding dword if we aren't going to be quad-word aligned. */
-	if (batch->batch_used & 1)
-		OUT_BATCH(MI_NOOP);
-
+	
 	if (DUMP_BATCHBUFFERS) {
 	    FILE *file = fopen(DUMP_BATCHBUFFERS, "a");
 	    if (file) {
@@ -211,9 +226,13 @@  void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx)
 	ret = dri_bo_subdata(batch->batch_bo, 0, batch->batch_used*4, batch->batch_ptr);
 	if (ret == 0) {
 		if (batch_idx == RENDER_BATCH) {
-			ret = dri_bo_exec(batch->batch_bo, batch->batch_used*4,
-					NULL, 0, 0xffffffff);
+			 ret = drm_intel_bo_mrb_exec(batch->batch_bo, 
+					 batch->batch_used*4,
+					 NULL, 0, 0xffffffff, I915_EXEC_RENDER);
 		} else {
+			 ret = drm_intel_bo_mrb_exec(batch->batch_bo, 
+					 batch->batch_used*4,
+					 NULL, 0, 0xffffffff, I915_EXEC_BLIT);
 		}
 	}
 	if (ret != 0) {
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 1ed3ad8..6d1ee15 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -156,20 +156,35 @@  intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
 }
 
 #define ALIGN_BATCH(align) intel_batch_align(intel, align, RENDER_BATCH);
+#define ALIGN_BATCH_BLT(align) intel_batch_align(intel, align, BLT_BATCH);
+
 #define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword, RENDER_BATCH)
+#define OUT_BATCH_BLT(dword) intel_batch_emit_dword(intel, dword, BLT_BATCH)
 
 #define OUT_RELOC(bo, read_domains, write_domains, delta) \
 	intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,RENDER_BATCH)
 
+#define OUT_RELOC_BLT(bo, read_domains, write_domains, delta) \
+	intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,BLT_BATCH)
+
 #define OUT_RELOC_FENCED(bo, read_domains, write_domains, delta) \
 	intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,RENDER_BATCH)
 
+#define OUT_RELOC_FENCED_BLT(bo, read_domains, write_domains, delta) \
+	intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,BLT_BATCH)
+
 #define OUT_RELOC_PIXMAP(pixmap, reads, write, delta)	\
 	intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, RENDER_BATCH)
 
+#define OUT_RELOC_PIXMAP_BLT(pixmap, reads, write, delta)	\
+	intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, BLT_BATCH)
+
 #define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta)	\
 	intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, RENDER_BATCH)
 
+#define OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, reads, write, delta)	\
+	intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, BLT_BATCH)
+
 union intfloat {
 	float f;
 	unsigned int ui;
@@ -181,20 +196,23 @@  union intfloat {
 	OUT_BATCH(tmp.ui);			\
 } while(0)
 
-#define BEGIN_BATCH(n)							\
+#define __BEGIN_BATCH(n,batch_idx)					\
 do {									\
-	struct batch *batch = &intel->batch[RENDER_BATCH];		\
+	struct batch *batch = &intel->batch[batch_idx];			\
 	if (batch->batch_emitting != 0)					\
 		FatalError("%s: BEGIN_BATCH called without closing "	\
 			   "ADVANCE_BATCH\n", __FUNCTION__);		\
 	assert(!batch->in_batch_atomic);				\
-	intel_batch_require_space(scrn, intel, (n) * 4, RENDER_BATCH);	\
+	intel_batch_require_space(scrn, intel, (n) * 4, batch_idx);	\
 	batch->batch_emitting = (n);					\
 	batch->batch_emit_start = batch->batch_used;			\
 } while (0)
 
-#define ADVANCE_BATCH() do {						\
-	struct batch *batch = &intel->batch[RENDER_BATCH];		\
+#define BEGIN_BATCH(n)		__BEGIN_BATCH(n,RENDER_BATCH)
+#define BEGIN_BATCH_BLT(n)	__BEGIN_BATCH(n,BLT_BATCH)
+
+#define __ADVANCE_BATCH(batch_idx) do {					\
+	struct batch *batch = &intel->batch[batch_idx];			\
 	if (batch->batch_emitting == 0)					\
 		FatalError("%s: ADVANCE_BATCH called with no matching "	\
 			   "BEGIN_BATCH\n", __FUNCTION__);		\
@@ -213,6 +231,9 @@  do {									\
 	batch->batch_emitting = 0;					\
 } while (0)
 
+#define ADVANCE_BATCH(batch_idx) __ADVANCE_BATCH(RENDER_BATCH)
+#define ADVANCE_BATCH_BLT(batch_idx) __ADVANCE_BATCH(BLT_BATCH)
+
 void intel_next_vertex(intel_screen_private *intel);
 static inline void intel_vertex_emit(intel_screen_private *intel, float v)
 {
diff --git a/src/intel_driver.c b/src/intel_driver.c
index b9fb69d..051497d 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -581,8 +581,6 @@  static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 	}
 
 	intel->use_shadow = FALSE;
-	if (IS_GEN6(intel))
-		intel->use_shadow = TRUE;
 
 	if (xf86IsOptionSet(intel->Options, OPTION_SHADOW)) {
 		intel->use_shadow =
@@ -809,6 +807,7 @@  intel_flush_callback(CallbackListPtr *list,
 		intel_batch_submit(scrn,
 				   intel->batch[RENDER_BATCH].need_mi_flush 
 				   ||!list_is_empty(&intel->batch[RENDER_BATCH].flush_pixmaps), RENDER_BATCH);
+
 	}
 }
 
diff --git a/src/intel_uxa.c b/src/intel_uxa.c
index 05ac3d2..cbd87ca 100644
--- a/src/intel_uxa.c
+++ b/src/intel_uxa.c
@@ -208,17 +208,9 @@  intel_uxa_pixmap_compute_size(PixmapPtr pixmap,
 }
 
 static Bool
-i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
+generic_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
 {
 	ScrnInfoPtr scrn = xf86Screens[drawable->pScreen->myNum];
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	if (IS_GEN6(intel)) {
-		intel_debug_fallback(scrn,
-				     "Sandybridge BLT engine not supported\n");
-		return FALSE;
-	}
-
 	if (!UXA_PM_IS_SOLID(drawable, planemask)) {
 		intel_debug_fallback(scrn, "planemask is not solid\n");
 		return FALSE;
@@ -232,7 +224,6 @@  i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
 	default:
 		return FALSE;
 	}
-
 	return TRUE;
 }
 
@@ -240,7 +231,7 @@  i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
  * Sets up hardware state for a series of solid fills.
  */
 static Bool
-i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
+generic_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
 {
 	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -252,10 +243,17 @@  i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
 	if (!intel_check_pitch_2d(pixmap))
 		return FALSE;
 
-	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), 
-				RENDER_BATCH))
-		return FALSE;
-
+	if (IS_GEN6(intel)) {
+		if (!intel_get_aperture_space(scrn, bo_table, 
+					ARRAY_SIZE(bo_table), 
+					BLT_BATCH))
+			return FALSE;
+	} else {
+		if (!intel_get_aperture_space(scrn, bo_table, 
+					ARRAY_SIZE(bo_table), 
+					RENDER_BATCH))
+			return FALSE;
+	}
 	intel->BR[13] = (I830PatternROP[alu] & 0xff) << 16;
 	switch (pixmap->drawable.bitsPerPixel) {
 	case 8:
@@ -274,6 +272,52 @@  i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
 	return TRUE;
 }
 
+static void gen6_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
+{
+	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	unsigned long pitch;
+	uint32_t cmd;
+	if (x1 < 0)
+		x1 = 0;
+	if (y1 < 0)
+		y1 = 0;
+	if (x2 > pixmap->drawable.width)
+		x2 = pixmap->drawable.width;
+	if (y2 > pixmap->drawable.height)
+		y2 = pixmap->drawable.height;
+
+	if (x2 <= x1 || y2 <= y1)
+		return;
+
+	pitch = intel_pixmap_pitch(pixmap);
+	{
+		BEGIN_BATCH_BLT(6);
+
+		cmd = XY_COLOR_BLT_CMD;
+
+		if (pixmap->drawable.bitsPerPixel == 32)
+			cmd |=
+			    XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+
+		if (intel_pixmap_tiled(pixmap)) {
+			assert((pitch % 512) == 0);
+			pitch >>= 2;
+			cmd |= XY_COLOR_BLT_TILED;
+		}
+
+		OUT_BATCH_BLT(cmd);
+
+		OUT_BATCH_BLT(intel->BR[13] | pitch);
+		OUT_BATCH_BLT((y1 << 16) | (x1 & 0xffff));
+		OUT_BATCH_BLT((y2 << 16) | (x2 & 0xffff));
+		OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, I915_GEM_DOMAIN_RENDER,
+					0, 0);
+		OUT_BATCH_BLT(intel->BR[16]);
+		ADVANCE_BATCH_BLT();
+	}
+}
+
 static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
 {
 	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
@@ -324,10 +368,15 @@  static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
 	ironlake_blt_workaround(scrn);
 }
 
-static void i830_uxa_done_solid(PixmapPtr pixmap)
+static void gen6_uxa_done_solid(PixmapPtr pixmap)
 {
 	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+	intel_batch_submit(scrn, FALSE, BLT_BATCH);
+}
 
+static void i830_uxa_done_solid(PixmapPtr pixmap)
+{
+	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
 	intel_debug_flush(scrn, RENDER_BATCH);
 }
 
@@ -336,17 +385,10 @@  static void i830_uxa_done_solid(PixmapPtr pixmap)
  *   - support planemask using FULL_BLT_CMD?
  */
 static Bool
-i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
+generic_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
 		    int alu, Pixel planemask)
 {
 	ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	if (IS_GEN6(intel)) {
-		intel_debug_fallback(scrn,
-				     "Sandybridge BLT engine not supported\n");
-		return FALSE;
-	}
 
 	if (!UXA_PM_IS_SOLID(&source->drawable, planemask)) {
 		intel_debug_fallback(scrn, "planemask is not solid");
@@ -375,7 +417,7 @@  i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
 }
 
 static Bool
-i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
+generic_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
 		      int ydir, int alu, Pixel planemask)
 {
 	ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
@@ -386,9 +428,18 @@  i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
 		intel_get_pixmap_bo(dest),
 	};
 
-	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), 
-		RENDER_BATCH))
-		return FALSE;
+
+	if (IS_GEN6(intel)) {
+		if (!intel_get_aperture_space(scrn, bo_table, 
+					ARRAY_SIZE(bo_table), 
+					BLT_BATCH))
+			return FALSE;
+	} else {
+		if (!intel_get_aperture_space(scrn, bo_table, 
+					ARRAY_SIZE(bo_table), 
+					RENDER_BATCH))
+			return FALSE;
+	}
 
 	intel->render_source = source;
 
@@ -408,6 +459,90 @@  i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
 }
 
 static void
+gen6_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
+	      int dst_y1, int w, int h)
+{
+	ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	uint32_t cmd;
+	int dst_x2, dst_y2, src_x2, src_y2;
+	unsigned int dst_pitch, src_pitch;
+
+	dst_x2 = dst_x1 + w;
+	dst_y2 = dst_y1 + h;
+
+	/* XXX Fixup extents as a lamentable workaround for missing
+	 * source clipping in the upper layers.
+	 */
+	if (dst_x1 < 0)
+		src_x1 -= dst_x1, dst_x1 = 0;
+	if (dst_y1 < 0)
+		src_y1 -= dst_y1, dst_y1 = 0;
+	if (dst_x2 > dest->drawable.width)
+		dst_x2 = dest->drawable.width;
+	if (dst_y2 > dest->drawable.height)
+		dst_y2 = dest->drawable.height;
+
+	src_x2 = src_x1 + (dst_x2 - dst_x1);
+	src_y2 = src_y1 + (dst_y2 - dst_y1);
+
+	if (src_x1 < 0)
+		dst_x1 -= src_x1, src_x1 = 0;
+	if (src_y1 < 0)
+		dst_y1 -= src_y1, src_y1 = 0;
+	if (src_x2 > intel->render_source->drawable.width)
+		dst_x2 -= src_x2 - intel->render_source->drawable.width;
+	if (src_y2 > intel->render_source->drawable.height)
+		dst_y2 -= src_y2 - intel->render_source->drawable.height;
+
+	if (dst_x2 <= dst_x1 || dst_y2 <= dst_y1)
+		return;
+
+	dst_pitch = intel_pixmap_pitch(dest);
+	src_pitch = intel_pixmap_pitch(intel->render_source);
+	{
+		BEGIN_BATCH_BLT(8);
+
+		cmd = XY_SRC_COPY_BLT_CMD;
+
+		if (dest->drawable.bitsPerPixel == 32)
+			cmd |=
+			    XY_SRC_COPY_BLT_WRITE_ALPHA |
+			    XY_SRC_COPY_BLT_WRITE_RGB;
+
+		if (INTEL_INFO(intel)->gen >= 40) {
+			if (intel_pixmap_tiled(dest)) {
+				assert((dst_pitch % 512) == 0);
+				dst_pitch >>= 2;
+				cmd |= XY_SRC_COPY_BLT_DST_TILED;
+			}
+
+			if (intel_pixmap_tiled(intel->render_source)) {
+				assert((src_pitch % 512) == 0);
+				src_pitch >>= 2;
+				cmd |= XY_SRC_COPY_BLT_SRC_TILED;
+			}
+		}
+
+		OUT_BATCH_BLT(cmd);
+
+		OUT_BATCH_BLT(intel->BR[13] | dst_pitch);
+		OUT_BATCH_BLT((dst_y1 << 16) | (dst_x1 & 0xffff));
+		OUT_BATCH_BLT((dst_y2 << 16) | (dst_x2 & 0xffff));
+		OUT_RELOC_PIXMAP_FENCED_BLT(dest,
+					I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER,
+					0);
+		OUT_BATCH_BLT((src_y1 << 16) | (src_x1 & 0xffff));
+		OUT_BATCH_BLT(src_pitch);
+		OUT_RELOC_PIXMAP_FENCED_BLT(intel->render_source,
+					I915_GEM_DOMAIN_RENDER, 0,
+					0);
+		ADVANCE_BATCH_BLT();
+	}
+}
+
+static void
 i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
 	      int dst_y1, int w, int h)
 {
@@ -497,10 +632,16 @@  i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
 static void i830_uxa_done_copy(PixmapPtr dest)
 {
 	ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+	intel_debug_flush(scrn, BLT_BATCH);
+}
 
-	intel_debug_flush(scrn, RENDER_BATCH);
+static void gen6_uxa_done_copy(PixmapPtr dest)
+{
+	ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+	intel_batch_submit(scrn, FALSE, BLT_BATCH);
 }
 
+
 /**
  * Do any cleanup from the Composite operation.
  *
@@ -1191,17 +1332,30 @@  Bool intel_uxa_init(ScreenPtr screen)
 	intel->vertex_bo = NULL;
 
 	/* Solid fill */
-	intel->uxa_driver->check_solid = i830_uxa_check_solid;
-	intel->uxa_driver->prepare_solid = i830_uxa_prepare_solid;
-	intel->uxa_driver->solid = i830_uxa_solid;
-	intel->uxa_driver->done_solid = i830_uxa_done_solid;
+	if (IS_GEN6(intel)) {
+		intel->uxa_driver->check_solid = generic_uxa_check_solid;
+		intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid;
+		intel->uxa_driver->solid = gen6_uxa_solid;
+		intel->uxa_driver->done_solid = gen6_uxa_done_solid;
+	} else {
+		intel->uxa_driver->check_solid = generic_uxa_check_solid;
+		intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid;
+		intel->uxa_driver->solid = i830_uxa_solid;
+		intel->uxa_driver->done_solid = i830_uxa_done_solid;
+	}
 
 	/* Copy */
-	intel->uxa_driver->check_copy = i830_uxa_check_copy;
-	intel->uxa_driver->prepare_copy = i830_uxa_prepare_copy;
-	intel->uxa_driver->copy = i830_uxa_copy;
-	intel->uxa_driver->done_copy = i830_uxa_done_copy;
-
+	if (IS_GEN6(intel)) {
+		intel->uxa_driver->check_copy = generic_uxa_check_copy;
+		intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy;
+		intel->uxa_driver->copy = gen6_uxa_copy;
+		intel->uxa_driver->done_copy = gen6_uxa_done_copy;
+	} else {
+		intel->uxa_driver->check_copy = generic_uxa_check_copy;
+		intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy;
+		intel->uxa_driver->copy = i830_uxa_copy;
+		intel->uxa_driver->done_copy = i830_uxa_done_copy;
+	}
 	/* Composite */
 	if (IS_GEN2(intel)) {
 		intel->uxa_driver->check_composite = i830_check_composite;