diff mbox

[1/2] split render engine batch buffer and BLT engine

Message ID 1288078396-6002-1-git-send-email-nanhai.zou@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zou, Nanhai Oct. 26, 2010, 7:33 a.m. UTC
None
diff mbox

Patch

diff --git a/src/i830_3d.c b/src/i830_3d.c
index 1043201..e0cfda7 100644
--- a/src/i830_3d.c
+++ b/src/i830_3d.c
@@ -38,7 +38,7 @@  void I830EmitInvarientState(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
-	assert(intel->in_batch_atomic);
+	assert(intel->batch[RENDER_BATCH].in_batch_atomic);
 
 	OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0));
 	OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1));
diff --git a/src/i830_render.c b/src/i830_render.c
index 52646d3..867883a 100644
--- a/src/i830_render.c
+++ b/src/i830_render.c
@@ -302,7 +302,7 @@  static void i830_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit)
 
 	format = i8xx_get_card_format(intel, picture);
 
-	assert(intel->in_batch_atomic);
+	assert(intel->batch[RENDER_BATCH].in_batch_atomic);
 
 	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
 		  LOAD_TEXTURE_MAP(unit) | 4);
@@ -488,7 +488,8 @@  i830_prepare_composite(int op, PicturePtr source_picture,
 	if (!i830_get_dest_format(dest_picture, &intel->render_dest_format))
 		return FALSE;
 
-	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
+	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), 
+				RENDER_BATCH))
 		return FALSE;
 
 	if (mask) {
@@ -565,7 +566,7 @@  i830_prepare_composite(int op, PicturePtr source_picture,
 
 	if(intel_pixmap_is_dirty(source) ||
 	   (mask && intel_pixmap_is_dirty(mask)))
-		intel_batch_emit_flush(scrn);
+		intel_batch_emit_flush(scrn, RENDER_BATCH);
 
 	intel->needs_render_state_emit = TRUE;
 
@@ -583,7 +584,7 @@  static void i830_emit_composite_state(ScrnInfoPtr scrn)
 	IntelEmitInvarientState(scrn);
 	intel->last_3d = LAST_3D_RENDER;
 
-	assert(intel->in_batch_atomic);
+	assert(intel->batch[RENDER_BATCH].in_batch_atomic);
 
 	if (intel_pixmap_tiled(intel->render_dest)) {
 		tiling_bits = BUF_3D_TILED_SURFACE;
@@ -848,7 +849,8 @@  i830_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	intel_batch_start_atomic(scrn, 58 +	/* invarient */
 				 22 +	/* setup */
 				 20 +	/* 2 * setup_texture */
-				 1 + 30 /* verts */ );
+				 1 + 30 /* verts */,
+				 RENDER_BATCH);
 
 	if (intel->needs_render_state_emit)
 		i830_emit_composite_state(scrn);
@@ -856,10 +858,10 @@  i830_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	i830_emit_composite_primitive(dest, srcX, srcY, maskX, maskY, dstX,
 				      dstY, w, h);
 
-	intel_batch_end_atomic(scrn);
+	intel_batch_end_atomic(scrn, RENDER_BATCH);
 }
 
-void i830_batch_flush_notify(ScrnInfoPtr scrn)
+void i830_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
diff --git a/src/i915_3d.c b/src/i915_3d.c
index 77db568..a4e386d 100644
--- a/src/i915_3d.c
+++ b/src/i915_3d.c
@@ -38,7 +38,7 @@  void I915EmitInvarientState(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
-	assert(intel->in_batch_atomic);
+	assert(intel->batch[RENDER_BATCH].in_batch_atomic);
 
 	OUT_BATCH(_3DSTATE_AA_CMD |
 		  AA_LINE_ECAAR_WIDTH_ENABLE |
diff --git a/src/i915_3d.h b/src/i915_3d.h
index 04531f3..056c961 100644
--- a/src/i915_3d.h
+++ b/src/i915_3d.h
@@ -608,12 +608,12 @@  enum i915_fs_channel {
 
 #define FS_BEGIN()							\
 	do {									\
-		_shader_offset = intel->batch_used++;				\
+		_shader_offset = intel->batch[RENDER_BATCH].batch_used++;				\
 	} while (0)
 
 #define FS_END()							\
 	do {									\
-		intel->batch_ptr[_shader_offset] =					\
+		intel->batch[RENDER_BATCH].batch_ptr[_shader_offset] =					\
 		_3DSTATE_PIXEL_SHADER_PROGRAM |					\
-		(intel->batch_used - _shader_offset - 2);			\
+		(intel->batch[RENDER_BATCH].batch_used - _shader_offset - 2);			\
 	} while (0);
diff --git a/src/i915_render.c b/src/i915_render.c
index fafdac5..21a0021 100644
--- a/src/i915_render.c
+++ b/src/i915_render.c
@@ -546,7 +546,7 @@  i915_emit_composite_primitive(PixmapPtr dest,
 {
 	ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
 	intel_screen_private *intel = intel_get_screen_private(scrn);
-	Bool is_affine_src, is_affine_mask = TRUE;
+	Bool is_affine_src = FALSE, is_affine_mask = TRUE;
 	int per_vertex, num_floats;
 	int tex_unit = 0;
 	int src_unit = -1, mask_unit = -1;
@@ -780,7 +780,8 @@  i915_prepare_composite(int op, PicturePtr source_picture,
 				  &intel->i915_render_state.dst_format))
 		return FALSE;
 
-	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
+	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
+		RENDER_BATCH))
 		return FALSE;
 
 	intel->needs_render_ca_pass = FALSE;
@@ -838,10 +839,10 @@  i915_prepare_composite(int op, PicturePtr source_picture,
 
 	/* BUF_INFO is an implicit flush */
 	if (dest != intel->render_current_dest)
-		intel_batch_do_flush(scrn);
+		intel_batch_do_flush(scrn, RENDER_BATCH);
 	else if((source && intel_pixmap_is_dirty(source)) ||
 		(mask && intel_pixmap_is_dirty(mask)))
-		intel_batch_emit_flush(scrn);
+		intel_batch_emit_flush(scrn, RENDER_BATCH);
 
 	intel->needs_render_state_emit = TRUE;
 
@@ -1007,7 +1008,7 @@  static void i915_emit_composite_setup(ScrnInfoPtr scrn)
 	tex_count += ! is_solid_src;
 	tex_count += mask && ! is_solid_mask;
 
-	assert(intel->in_batch_atomic);
+	assert(intel->batch[RENDER_BATCH].in_batch_atomic);
 
 	if (tex_count != 0) {
 	    OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
@@ -1043,7 +1044,7 @@  static void i915_emit_composite_setup(ScrnInfoPtr scrn)
 	if (1 || dest != intel->render_current_dest) {
 		uint32_t tiling_bits;
 
-		intel_batch_do_flush(scrn);
+		intel_batch_do_flush(scrn, RENDER_BATCH);
 
 		if (intel_pixmap_tiled(dest)) {
 			tiling_bits = BUF_3D_TILED_SURFACE;
@@ -1116,7 +1117,7 @@  i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
 	/* 28 + 16 + 10 + 20 + 32 + 16 */
-	intel_batch_start_atomic(scrn, 150);
+	intel_batch_start_atomic(scrn, 150, RENDER_BATCH);
 
 	if (intel->needs_render_state_emit)
 		i915_emit_composite_setup(scrn);
@@ -1158,7 +1159,7 @@  i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 			i915_composite_emit_shader(intel, PictOpOutReverse);
 		}
 
-		intel->prim_offset = intel->batch_used;
+		intel->prim_offset = intel->batch[RENDER_BATCH].batch_used;
 		OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
 		OUT_BATCH(intel->vertex_index);
 	}
@@ -1170,7 +1171,7 @@  i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 			 dstX, dstY,
 			 w, h);
 
-	intel_batch_end_atomic(scrn);
+	intel_batch_end_atomic(scrn, RENDER_BATCH);
 }
 
 void
@@ -1179,7 +1180,7 @@  i915_vertex_flush(intel_screen_private *intel)
 	if (intel->prim_offset == 0)
 		return;
 
-	intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
+	intel->batch[RENDER_BATCH].batch_ptr[intel->prim_offset] |= intel->vertex_count;
 	intel->prim_offset = 0;
 
 	if (intel->needs_render_ca_pass) {
@@ -1197,7 +1198,7 @@  i915_vertex_flush(intel_screen_private *intel)
 }
 
 void
-i915_batch_flush_notify(ScrnInfoPtr scrn)
+i915_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
diff --git a/src/i915_video.c b/src/i915_video.c
index 861bea6..87016a7 100644
--- a/src/i915_video.c
+++ b/src/i915_video.c
@@ -92,7 +92,7 @@  I915DisplayVideoTextured(ScrnInfoPtr scrn,
 
 #define BYTES_FOR_BOXES(n)	((200 + (n) * 20) * 4)
 #define BOXES_IN_BYTES(s)	((((s)/4) - 200) / 20)
-#define BATCH_BYTES(p)		((p)->batch_bo->size - 16)
+#define BATCH_BYTES(p)		((p)->batch[RENDER_BATCH].batch_bo->size - 16)
 
 	while (nbox_total) {
 		nbox_this_time = nbox_total;
@@ -100,7 +100,8 @@  I915DisplayVideoTextured(ScrnInfoPtr scrn,
 			nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(intel));
 		nbox_total -= nbox_this_time;
 
-		intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time);
+		intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time,
+			RENDER_BATCH);
 
 		IntelEmitInvarientState(scrn);
 		intel->last_3d = LAST_3D_VIDEO;
@@ -445,7 +446,7 @@  I915DisplayVideoTextured(ScrnInfoPtr scrn,
 			OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
 		}
 
-		intel_batch_end_atomic(scrn);
+		intel_batch_end_atomic(scrn, RENDER_BATCH);
 	}
 
 	if (target != pixmap) {
@@ -478,5 +479,5 @@  I915DisplayVideoTextured(ScrnInfoPtr scrn,
 		target->drawable.pScreen->DestroyPixmap(target);
 	}
 
-	intel_debug_flush(scrn);
+	intel_debug_flush(scrn, RENDER_BATCH);
 }
diff --git a/src/i965_render.c b/src/i965_render.c
index c0c5de4..c9c65fb 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1128,7 +1128,7 @@  i965_set_picture_surface_state(intel_screen_private *intel,
 			read_domains = I915_GEM_DOMAIN_SAMPLER;
 		}
 
-		intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain);
+		intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain, RENDER_BATCH);
 		dri_bo_emit_reloc(ss_bo, read_domains, write_domain,
 				  0,
 				  ss_index * sizeof(*ss) +
@@ -1169,7 +1169,8 @@  static void i965_emit_composite_state(ScrnInfoPtr scrn)
 	intel_batch_mark_pixmap_domains(intel,
 					intel_get_pixmap_private(dest),
 					I915_GEM_DOMAIN_RENDER,
-					I915_GEM_DOMAIN_RENDER);
+					I915_GEM_DOMAIN_RENDER,
+					RENDER_BATCH);
 
 	urb_vs_start = 0;
 	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
@@ -1195,7 +1196,7 @@  static void i965_emit_composite_state(ScrnInfoPtr scrn)
 	 */
 	ALIGN_BATCH(64);
 
-	assert(intel->in_batch_atomic);
+	assert(intel->batch[RENDER_BATCH].in_batch_atomic);
 	{
 		/* Match Mesa driver setup */
 		OUT_BATCH(MI_FLUSH |
@@ -1473,7 +1474,7 @@  static Bool i965_composite_check_aperture(ScrnInfoPtr scrn)
 	struct gen4_render_state *render_state = intel->gen4_render_state;
 	gen4_composite_op *composite_op = &render_state->composite_op;
 	drm_intel_bo *bo_table[] = {
-		intel->batch_bo,
+		intel->batch[RENDER_BATCH].batch_bo,
 		composite_op->binding_table_bo,
 		render_state->vertex_buffer_bo,
 		render_state->vs_state_bo,
@@ -1558,7 +1559,7 @@  i965_prepare_composite(int op, PicturePtr source_picture,
 	/* Flush any pending writes prior to relocating the textures. */
 	if (intel_pixmap_is_dirty(source) ||
 	    (mask && intel_pixmap_is_dirty(mask)))
-		intel_batch_emit_flush(scrn);
+		intel_batch_emit_flush(scrn, RENDER_BATCH);
 
 
 	/* Set up the surface states. */
@@ -1684,7 +1685,7 @@  i965_prepare_composite(int op, PicturePtr source_picture,
 	}
 
 	if (!i965_composite_check_aperture(scrn)) {
-		intel_batch_submit(scrn, FALSE);
+		intel_batch_submit(scrn, FALSE, RENDER_BATCH);
 		if (!i965_composite_check_aperture(scrn)) {
 			intel_debug_fallback(scrn,
 					     "Couldn't fit render operation "
@@ -1857,9 +1858,9 @@  i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb);
 
 	if (!i965_composite_check_aperture(scrn))
-		intel_batch_submit(scrn, FALSE);
+		intel_batch_submit(scrn, FALSE, RENDER_BATCH);
 
-	intel_batch_start_atomic(scrn, 200);
+	intel_batch_start_atomic(scrn, 200, RENDER_BATCH);
 	if (intel->needs_render_state_emit)
 		i965_emit_composite_state(scrn);
 
@@ -1891,10 +1892,10 @@  i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	render_state->vb_offset += i;
 	drm_intel_bo_unreference(vb_bo);
 
-	intel_batch_end_atomic(scrn);
+	intel_batch_end_atomic(scrn, RENDER_BATCH);
 }
 
-void i965_batch_flush_notify(ScrnInfoPtr scrn)
+void i965_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 	struct gen4_render_state *render_state = intel->gen4_render_state;
diff --git a/src/i965_video.c b/src/i965_video.c
index 4ededde..f3e4082 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1154,7 +1154,7 @@  I965DisplayVideoTextured(ScrnInfoPtr scrn,
 		float *vb;
 		drm_intel_bo *bo_table[] = {
 			NULL,	/* vb_bo */
-			intel->batch_bo,
+			intel->batch[RENDER_BATCH].batch_bo,
 			bind_bo,
 			intel->video.gen4_sampler_bo,
 			intel->video.gen4_sip_kernel_bo,
@@ -1199,10 +1199,10 @@  I965DisplayVideoTextured(ScrnInfoPtr scrn,
 		if (drm_intel_bufmgr_check_aperture_space(bo_table,
 							  ARRAY_SIZE(bo_table))
 		    < 0) {
-			intel_batch_submit(scrn, FALSE);
+			intel_batch_submit(scrn, FALSE, RENDER_BATCH);
 		}
 
-		intel_batch_start_atomic(scrn, 100);
+		intel_batch_start_atomic(scrn, 100, RENDER_BATCH);
 
 		i965_emit_video_setup(scrn, bind_bo, n_src_surf);
 
@@ -1228,7 +1228,7 @@  I965DisplayVideoTextured(ScrnInfoPtr scrn,
 		OUT_BATCH(0);	/* index buffer offset, ignored */
 		OUT_BATCH(MI_NOOP);
 
-		intel_batch_end_atomic(scrn);
+		intel_batch_end_atomic(scrn, RENDER_BATCH);
 
 		drm_intel_bo_unreference(vb_bo);
 
@@ -1240,7 +1240,7 @@  I965DisplayVideoTextured(ScrnInfoPtr scrn,
 	/* release reference once we're finished */
 	drm_intel_bo_unreference(bind_bo);
 
-	intel_debug_flush(scrn);
+	intel_debug_flush(scrn, RENDER_BATCH);
 }
 
 void i965_free_video(ScrnInfoPtr scrn)
diff --git a/src/intel.h b/src/intel.h
index 7604eee..c5bdbd5 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -223,7 +223,7 @@  static inline Bool intel_pixmap_tiled(PixmapPtr pixmap)
 }
 
 dri_bo *intel_get_pixmap_bo(PixmapPtr pixmap);
-void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo);
+void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo, int batch_idx);
 
 typedef struct _I830OutputRec I830OutputRec, *I830OutputPtr;
 
@@ -284,6 +284,26 @@  enum dri_type {
 	DRI_DRI2
 };
 
+struct batch {
+	uint32_t batch_ptr[4096];
+	/** Byte offset in batch_ptr for the next dword to be emitted. */
+	unsigned int batch_used;
+	/** Position in batch_ptr at the start of the current BEGIN_BATCH */
+	unsigned int batch_emit_start;
+	/** Number of bytes to be emitted in the current BEGIN_BATCH. */
+	uint32_t batch_emitting;
+	dri_bo *batch_bo;
+	dri_bo *last_batch_bo;
+	/** Whether we're in a section of code that can't tolerate flushing */
+	Bool in_batch_atomic;
+	/** Ending batch_used that was verified by intel_start_batch_atomic() */
+	int batch_atomic_limit;
+	struct list batch_pixmaps;
+	struct list flush_pixmaps;
+	struct list in_flight;
+	Bool need_mi_flush;
+};
+
 typedef struct intel_screen_private {
 	ScrnInfoPtr scrn;
 	unsigned char *MMIOBase;
@@ -304,23 +324,9 @@  typedef struct intel_screen_private {
 
 	dri_bufmgr *bufmgr;
 
-	uint32_t batch_ptr[4096];
-	/** Byte offset in batch_ptr for the next dword to be emitted. */
-	unsigned int batch_used;
-	/** Position in batch_ptr at the start of the current BEGIN_BATCH */
-	unsigned int batch_emit_start;
-	/** Number of bytes to be emitted in the current BEGIN_BATCH. */
-	uint32_t batch_emitting;
-	dri_bo *batch_bo;
-	dri_bo *last_batch_bo;
-	/** Whether we're in a section of code that can't tolerate flushing */
-	Bool in_batch_atomic;
-	/** Ending batch_used that was verified by intel_start_batch_atomic() */
-	int batch_atomic_limit;
-	struct list batch_pixmaps;
-	struct list flush_pixmaps;
-	struct list in_flight;
-
+#define RENDER_BATCH	0
+#define BLT_BATCH	1
+	struct batch batch[2];
 	/* For Xvideo */
 	Bool use_overlay;
 #ifdef INTEL_XVMC
@@ -332,7 +338,6 @@  typedef struct intel_screen_private {
 
 	Bool shadow_present;
 
-	Bool need_mi_flush;
 
 	Bool tiling;
 	Bool swapbuffers_wait;
@@ -348,7 +353,7 @@  typedef struct intel_screen_private {
 	CloseScreenProcPtr CloseScreen;
 
 	void (*vertex_flush) (struct intel_screen_private *intel);
-	void (*batch_flush_notify) (ScrnInfoPtr scrn);
+	void (*batch_flush_notify) (ScrnInfoPtr scrn, int batch_idx);
 
 	uxa_driver_t *uxa_driver;
 	Bool need_sync;
@@ -554,8 +559,8 @@  Bool i915_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask,
 void i915_composite(PixmapPtr dest, int srcX, int srcY,
 		    int maskX, int maskY, int dstX, int dstY, int w, int h);
 void i915_vertex_flush(intel_screen_private *intel);
-void i915_batch_flush_notify(ScrnInfoPtr scrn);
-void i830_batch_flush_notify(ScrnInfoPtr scrn);
+void i915_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx);
+void i830_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx);
 /* i965_render.c */
 unsigned int gen4_render_state_size(ScrnInfoPtr scrn);
 void gen4_render_state_init(ScrnInfoPtr scrn);
@@ -570,7 +575,7 @@  Bool i965_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask,
 void i965_composite(PixmapPtr dest, int srcX, int srcY,
 		    int maskX, int maskY, int dstX, int dstY, int w, int h);
 
-void i965_batch_flush_notify(ScrnInfoPtr scrn);
+void i965_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx);
 
 Bool intel_transform_is_affine(PictTransformPtr t);
 Bool
@@ -658,7 +663,7 @@  static inline drm_intel_bo *intel_bo_alloc_for_data(ScrnInfoPtr scrn,
 #define ALLOW_SHARING			0x00000010
 #define DISABLE_REUSE			0x00000020
 
-void intel_debug_flush(ScrnInfoPtr scrn);
+void intel_debug_flush(ScrnInfoPtr scrn, int batch_idx);
 
 static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable)
 {
@@ -683,7 +688,7 @@  Bool intel_uxa_init(ScreenPtr pScreen);
 void intel_uxa_create_screen_resources(ScreenPtr pScreen);
 void intel_uxa_block_handler(intel_screen_private *intel);
 Bool intel_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table,
-			      int num_bos);
+			      int num_bos, int batch_idx);
 
 /* intel_shadow.c */
 void intel_shadow_blt(intel_screen_private *intel);
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index e7ca69d..3b72ba1 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -61,19 +61,22 @@  void intel_next_vertex(intel_screen_private *intel)
 	intel->vertex_used = 0;
 }
 
-static void intel_next_batch(ScrnInfoPtr scrn)
+static void intel_next_batch(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct batch *batch = &intel->batch[batch_idx];
 
+	batch->batch_emit_start = 0;
+	batch->batch_emitting = 0;
 	/* The 865 has issues with larger-than-page-sized batch buffers. */
 	if (IS_I865G(intel))
-		intel->batch_bo =
+		batch->batch_bo =
 		    dri_bo_alloc(intel->bufmgr, "batch", 4096, 4096);
 	else
-		intel->batch_bo =
+		batch->batch_bo =
 		    dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096);
 
-	intel->batch_used = 0;
+	batch->batch_used = 0;
 
 	/* We don't know when another client has executed, so we have
 	 * to reinitialize our 3D state per batch.
@@ -85,41 +88,36 @@  void intel_batch_init(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
-	intel->batch_emit_start = 0;
-	intel->batch_emitting = 0;
-
-	intel_next_batch(scrn);
+	intel_next_batch(scrn, RENDER_BATCH);
+	if (IS_GEN6(intel))
+		intel_next_batch(scrn, BLT_BATCH);
 }
 
-void intel_batch_teardown(ScrnInfoPtr scrn)
+static void batch_teardown(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct batch *batch = &intel->batch[batch_idx];
 
-	if (intel->batch_bo != NULL) {
-		dri_bo_unreference(intel->batch_bo);
-		intel->batch_bo = NULL;
-	}
-
-	if (intel->last_batch_bo != NULL) {
-		dri_bo_unreference(intel->last_batch_bo);
-		intel->last_batch_bo = NULL;
+	if (batch->batch_bo != NULL) {
+		dri_bo_unreference(batch->batch_bo);
+		batch->batch_bo = NULL;
 	}
 
-	if (intel->vertex_bo) {
-		dri_bo_unreference(intel->vertex_bo);
-		intel->vertex_bo = NULL;
+	if (batch->last_batch_bo != NULL) {
+		dri_bo_unreference(batch->last_batch_bo);
+		batch->last_batch_bo = NULL;
 	}
 
-	while (!list_is_empty(&intel->batch_pixmaps))
-		list_del(intel->batch_pixmaps.next);
+	while (!list_is_empty(&batch->batch_pixmaps))
+		list_del(batch->batch_pixmaps.next);
 
-	while (!list_is_empty(&intel->flush_pixmaps))
-		list_del(intel->flush_pixmaps.next);
+	while (!list_is_empty(&batch->flush_pixmaps))
+		list_del(batch->flush_pixmaps.next);
 
-	while (!list_is_empty(&intel->in_flight)) {
+	while (!list_is_empty(&batch->in_flight)) {
 		struct intel_pixmap *entry;
 
-		entry = list_first_entry(&intel->in_flight,
+		entry = list_first_entry(&batch->in_flight,
 					 struct intel_pixmap,
 					 in_flight);
 
@@ -129,70 +127,95 @@  void intel_batch_teardown(ScrnInfoPtr scrn)
 	}
 }
 
-void intel_batch_do_flush(ScrnInfoPtr scrn)
+void intel_batch_teardown(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
-	while (!list_is_empty(&intel->flush_pixmaps))
-		list_del(intel->flush_pixmaps.next);
+	if (intel->vertex_bo) {
+		dri_bo_unreference(intel->vertex_bo);
+		intel->vertex_bo = NULL;
+	}
 
-	intel->need_mi_flush = FALSE;
+	batch_teardown(scrn, RENDER_BATCH);
+	if (IS_GEN6(intel))
+		batch_teardown(scrn, BLT_BATCH);
 }
 
-void intel_batch_emit_flush(ScrnInfoPtr scrn)
+void intel_batch_do_flush(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
-	int flags;
+	struct batch *batch = &intel->batch[batch_idx];
 
-	assert (!intel->in_batch_atomic);
+	while (!list_is_empty(&batch->flush_pixmaps))
+		list_del(batch->flush_pixmaps.next);
 
-	/* Big hammer, look to the pipelined flushes in future. */
-	flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
-	if (INTEL_INFO(intel)->gen >= 40)
-		flags = 0;
+	batch->need_mi_flush = FALSE;
+}
 
-	BEGIN_BATCH(1);
-	OUT_BATCH(MI_FLUSH | flags);
-	ADVANCE_BATCH();
+void intel_batch_emit_flush(ScrnInfoPtr scrn, int batch_idx)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct batch *batch = &intel->batch[batch_idx];
+	int flags;
 
-	intel_batch_do_flush(scrn);
+	assert (!batch->in_batch_atomic);
+	if (batch_idx == RENDER_BATCH) {
+		/* Big hammer, look to the pipelined flushes in future. */
+		flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
+		if (INTEL_INFO(intel)->gen >= 40)
+			flags = 0;
+
+		BEGIN_BATCH(1);
+		OUT_BATCH(MI_FLUSH | flags);
+		ADVANCE_BATCH();
+
+		intel_batch_do_flush(scrn, batch_idx);
+	} else {
+	}
 }
 
-void intel_batch_submit(ScrnInfoPtr scrn, int flush)
+void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 	int ret;
+	struct batch *batch = &intel->batch[batch_idx];
 
-	assert (!intel->in_batch_atomic);
+	assert (!batch->in_batch_atomic);
 
-	if (intel->vertex_flush)
-		intel->vertex_flush(intel);
-	intel_end_vertex(intel);
+	if (batch_idx == RENDER_BATCH) {
+		if (intel->vertex_flush)
+			intel->vertex_flush(intel);
+		intel_end_vertex(intel);
+	}
 
 	if (flush)
-		intel_batch_emit_flush(scrn);
+		intel_batch_emit_flush(scrn, batch_idx);
 
-	if (intel->batch_used == 0)
+	if (batch->batch_used == 0)
 		return;
 
 	/* Mark the end of the batchbuffer. */
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 	/* Emit a padding dword if we aren't going to be quad-word aligned. */
-	if (intel->batch_used & 1)
+	if (batch->batch_used & 1)
 		OUT_BATCH(MI_NOOP);
 
 	if (DUMP_BATCHBUFFERS) {
 	    FILE *file = fopen(DUMP_BATCHBUFFERS, "a");
 	    if (file) {
-		fwrite (intel->batch_ptr, intel->batch_used*4, 1, file);
+		fwrite (batch->batch_ptr, batch->batch_used*4, 1, file);
 		fclose(file);
 	    }
 	}
 
-	ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr);
-	if (ret == 0)
-		ret = dri_bo_exec(intel->batch_bo, intel->batch_used*4,
-				  NULL, 0, 0xffffffff);
+	ret = dri_bo_subdata(batch->batch_bo, 0, batch->batch_used*4, batch->batch_ptr);
+	if (ret == 0) {
+		if (batch_idx == RENDER_BATCH) {
+			ret = dri_bo_exec(batch->batch_bo, batch->batch_used*4,
+					NULL, 0, 0xffffffff);
+		} else {
+		}
+	}
 	if (ret != 0) {
 		if (ret == -EIO) {
 			static int once;
@@ -212,10 +235,10 @@  void intel_batch_submit(ScrnInfoPtr scrn, int flush)
 		}
 	}
 
-	while (!list_is_empty(&intel->batch_pixmaps)) {
+	while (!list_is_empty(&batch->batch_pixmaps)) {
 		struct intel_pixmap *entry;
 
-		entry = list_first_entry(&intel->batch_pixmaps,
+		entry = list_first_entry(&batch->batch_pixmaps,
 					 struct intel_pixmap,
 					 batch);
 
@@ -224,14 +247,14 @@  void intel_batch_submit(ScrnInfoPtr scrn, int flush)
 		list_del(&entry->batch);
 	}
 
-	intel->need_mi_flush |= !list_is_empty(&intel->flush_pixmaps);
-	while (!list_is_empty(&intel->flush_pixmaps))
-		list_del(intel->flush_pixmaps.next);
+	batch->need_mi_flush |= !list_is_empty(&batch->flush_pixmaps);
+	while (!list_is_empty(&batch->flush_pixmaps))
+		list_del(batch->flush_pixmaps.next);
 
-	while (!list_is_empty(&intel->in_flight)) {
+	while (!list_is_empty(&batch->in_flight)) {
 		struct intel_pixmap *entry;
 
-		entry = list_first_entry(&intel->in_flight,
+		entry = list_first_entry(&batch->in_flight,
 					 struct intel_pixmap,
 					 in_flight);
 
@@ -243,38 +266,40 @@  void intel_batch_submit(ScrnInfoPtr scrn, int flush)
 	/* Save a ref to the last batch emitted, which we use for syncing
 	 * in debug code.
 	 */
-	dri_bo_unreference(intel->last_batch_bo);
-	intel->last_batch_bo = intel->batch_bo;
-	intel->batch_bo = NULL;
+	dri_bo_unreference(batch->last_batch_bo);
+	batch->last_batch_bo = batch->batch_bo;
+	batch->batch_bo = NULL;
 
-	intel_next_batch(scrn);
+	intel_next_batch(scrn, batch_idx);
 
 	if (intel->debug_flush & DEBUG_FLUSH_WAIT)
-		intel_batch_wait_last(scrn);
+		intel_batch_wait_last(scrn, batch_idx);
 
 	if (intel->batch_flush_notify)
-		intel->batch_flush_notify(scrn);
+		intel->batch_flush_notify(scrn, batch_idx);
 }
 
 /** Waits on the last emitted batchbuffer to be completed. */
-void intel_batch_wait_last(ScrnInfoPtr scrn)
+void intel_batch_wait_last(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct batch *batch = &intel->batch[batch_idx];
 
 	/* Map it CPU write, which guarantees it's done.  This is a completely
 	 * non performance path, so we don't need anything better.
 	 */
-	drm_intel_gem_bo_map_gtt(intel->last_batch_bo);
-	drm_intel_gem_bo_unmap_gtt(intel->last_batch_bo);
+	drm_intel_gem_bo_map_gtt(batch->last_batch_bo);
+	drm_intel_gem_bo_unmap_gtt(batch->last_batch_bo);
 }
 
-void intel_debug_flush(ScrnInfoPtr scrn)
+void intel_debug_flush(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
 	if (intel->debug_flush & DEBUG_FLUSH_CACHES)
-		intel_batch_emit_flush(scrn);
+		intel_batch_emit_flush(scrn, batch_idx);
 
 	if (intel->debug_flush & DEBUG_FLUSH_BATCHES)
-		intel_batch_submit(scrn, FALSE);
+		intel_batch_submit(scrn, FALSE, batch_idx);
 }
+
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index bf7a5d9..1ed3ad8 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -35,14 +35,15 @@  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 void intel_batch_init(ScrnInfoPtr scrn);
 void intel_batch_teardown(ScrnInfoPtr scrn);
-void intel_batch_emit_flush(ScrnInfoPtr scrn);
-void intel_batch_do_flush(ScrnInfoPtr scrn);
-void intel_batch_submit(ScrnInfoPtr scrn, int flush);
-void intel_batch_wait_last(ScrnInfoPtr scrn);
+void intel_batch_emit_flush(ScrnInfoPtr scrn, int batch_idx);
+void intel_batch_do_flush(ScrnInfoPtr scrn, int batch_idx);
+void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx);
+void intel_batch_wait_last(ScrnInfoPtr scrn, int batch_idx);
 
-static inline int intel_batch_space(intel_screen_private *intel)
+static inline int intel_batch_space(intel_screen_private *intel, int batch_idx)
 {
-	return (intel->batch_bo->size - BATCH_RESERVED) - (4*intel->batch_used);
+	return (intel->batch[batch_idx].batch_bo->size - BATCH_RESERVED) - 
+		(4*intel->batch[batch_idx].batch_used);
 }
 
 static inline int intel_vertex_space(intel_screen_private *intel)
@@ -51,49 +52,50 @@  static inline int intel_vertex_space(intel_screen_private *intel)
 }
 
 static inline void
-intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, unsigned int sz)
+intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, unsigned int sz, int batch_idx)
 {
-	assert(sz < intel->batch_bo->size - 8);
-	if (intel_batch_space(intel) < sz)
-		intel_batch_submit(scrn, FALSE);
+	assert(sz < intel->batch[batch_idx].batch_bo->size - 8);
+	if (intel_batch_space(intel, batch_idx) < sz)
+		intel_batch_submit(scrn, FALSE, batch_idx);
 }
 
-static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz)
+static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
-	assert(!intel->in_batch_atomic);
-	intel_batch_require_space(scrn, intel, sz * 4);
+	assert(!intel->batch[batch_idx].in_batch_atomic);
+	intel_batch_require_space(scrn, intel, sz * 4, batch_idx);
 
-	intel->in_batch_atomic = TRUE;
-	intel->batch_atomic_limit = intel->batch_used + sz;
+	intel->batch[batch_idx].in_batch_atomic = TRUE;
+	intel->batch[batch_idx].batch_atomic_limit = intel->batch[batch_idx].batch_used + sz;
 }
 
-static inline void intel_batch_end_atomic(ScrnInfoPtr scrn)
+static inline void intel_batch_end_atomic(ScrnInfoPtr scrn, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	assert(intel->in_batch_atomic);
-	assert(intel->batch_used <= intel->batch_atomic_limit);
-	intel->in_batch_atomic = FALSE;
+	struct batch *batch = &intel->batch[batch_idx];
+	assert(batch->in_batch_atomic);
+	assert(batch->batch_used <= batch->batch_atomic_limit);
+	batch->in_batch_atomic = FALSE;
 }
 
-static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t dword)
+static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t dword, int batch_idx)
 {
-	intel->batch_ptr[intel->batch_used++] = dword;
+	struct batch *batch = &intel->batch[batch_idx];
+	batch->batch_ptr[batch->batch_used++] = dword;
 }
 
-static inline void intel_batch_align(intel_screen_private *intel, uint32_t align)
+static inline void intel_batch_align(intel_screen_private *intel, uint32_t align, int batch_idx)
 {
 	uint32_t delta;
-
+	struct batch *batch = &intel->batch[batch_idx];
 	align /= 4;
 	assert(align);
 
-	if ((delta = intel->batch_used & (align - 1))) {
+	if ((delta = batch->batch_used & (align - 1))) {
 		delta = align - delta;
-		memset (intel->batch_ptr + intel->batch_used, 0, 4*delta);
-		intel->batch_used += delta;
+		memset (batch->batch_ptr + batch->batch_used, 0, 4*delta);
+		batch->batch_used += delta;
 	}
 }
 
@@ -101,33 +103,38 @@  static inline void
 intel_batch_emit_reloc(intel_screen_private *intel,
 		       dri_bo * bo,
 		       uint32_t read_domains,
-		       uint32_t write_domains, uint32_t delta, int needs_fence)
+		       uint32_t write_domains, uint32_t delta, int needs_fence,
+		       int batch_idx)
 {
+
+	struct batch *batch = &intel->batch[batch_idx];
 	if (needs_fence)
-		drm_intel_bo_emit_reloc_fence(intel->batch_bo,
-					      intel->batch_used * 4,
+		drm_intel_bo_emit_reloc_fence(batch->batch_bo,
+					      batch->batch_used * 4,
 					      bo, delta,
 					      read_domains, write_domains);
 	else
-		drm_intel_bo_emit_reloc(intel->batch_bo, intel->batch_used * 4,
+		drm_intel_bo_emit_reloc(batch->batch_bo, batch->batch_used * 4,
 					bo, delta,
 					read_domains, write_domains);
 
-	intel_batch_emit_dword(intel, bo->offset + delta);
+	intel_batch_emit_dword(intel, bo->offset + delta, batch_idx);
 }
 
 static inline void
 intel_batch_mark_pixmap_domains(intel_screen_private *intel,
 				struct intel_pixmap *priv,
-				uint32_t read_domains, uint32_t write_domain)
+				uint32_t read_domains, uint32_t write_domain,
+				int batch_idx)
 {
+	struct batch *batch = &intel->batch[batch_idx];
 	assert (read_domains);
 	assert (write_domain == 0 || write_domain == read_domains);
 
 	if (list_is_empty(&priv->batch))
-		list_add(&priv->batch, &intel->batch_pixmaps);
+		list_add(&priv->batch, &batch->batch_pixmaps);
 	if (write_domain && list_is_empty(&priv->flush))
-		list_add(&priv->flush, &intel->flush_pixmaps);
+		list_add(&priv->flush, &batch->flush_pixmaps);
 
 	priv->batch_write |= write_domain != 0;
 	priv->busy = 1;
@@ -136,31 +143,32 @@  intel_batch_mark_pixmap_domains(intel_screen_private *intel,
 static inline void
 intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
 			      uint32_t read_domains, uint32_t write_domain,
-			      uint32_t delta, int needs_fence)
+			      uint32_t delta, int needs_fence, int batch_idx)
 {
 	struct intel_pixmap *priv = intel_get_pixmap_private(pixmap);
 
-	intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain);
+	intel_batch_mark_pixmap_domains(intel, priv, read_domains,
+			write_domain, batch_idx);
 
 	intel_batch_emit_reloc(intel, priv->bo,
 			       read_domains, write_domain,
-			       delta, needs_fence);
+			       delta, needs_fence, batch_idx);
 }
 
-#define ALIGN_BATCH(align) intel_batch_align(intel, align);
-#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword)
+#define ALIGN_BATCH(align) intel_batch_align(intel, align, RENDER_BATCH);
+#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword, RENDER_BATCH)
 
 #define OUT_RELOC(bo, read_domains, write_domains, delta) \
-	intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0)
+	intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,RENDER_BATCH)
 
 #define OUT_RELOC_FENCED(bo, read_domains, write_domains, delta) \
-	intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1)
+	intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,RENDER_BATCH)
 
 #define OUT_RELOC_PIXMAP(pixmap, reads, write, delta)	\
-	intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0)
+	intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, RENDER_BATCH)
 
 #define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta)	\
-	intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1)
+	intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, RENDER_BATCH)
 
 union intfloat {
 	float f;
@@ -175,32 +183,34 @@  union intfloat {
 
 #define BEGIN_BATCH(n)							\
 do {									\
-	if (intel->batch_emitting != 0)					\
+	struct batch *batch = &intel->batch[RENDER_BATCH];		\
+	if (batch->batch_emitting != 0)					\
 		FatalError("%s: BEGIN_BATCH called without closing "	\
 			   "ADVANCE_BATCH\n", __FUNCTION__);		\
-	assert(!intel->in_batch_atomic);				\
-	intel_batch_require_space(scrn, intel, (n) * 4);		\
-	intel->batch_emitting = (n);					\
-	intel->batch_emit_start = intel->batch_used;			\
+	assert(!batch->in_batch_atomic);				\
+	intel_batch_require_space(scrn, intel, (n) * 4, RENDER_BATCH);	\
+	batch->batch_emitting = (n);					\
+	batch->batch_emit_start = batch->batch_used;			\
 } while (0)
 
 #define ADVANCE_BATCH() do {						\
-	if (intel->batch_emitting == 0)					\
+	struct batch *batch = &intel->batch[RENDER_BATCH];		\
+	if (batch->batch_emitting == 0)					\
 		FatalError("%s: ADVANCE_BATCH called with no matching "	\
 			   "BEGIN_BATCH\n", __FUNCTION__);		\
-	if (intel->batch_used >						\
-	    intel->batch_emit_start + intel->batch_emitting)		\
+	if (batch->batch_used >						\
+	    batch->batch_emit_start + batch->batch_emitting)		\
 		FatalError("%s: ADVANCE_BATCH: exceeded allocation %d/%d\n ", \
 			   __FUNCTION__,				\
-			   intel->batch_used - intel->batch_emit_start,	\
-			   intel->batch_emitting);			\
-	if (intel->batch_used < intel->batch_emit_start +		\
-	    intel->batch_emitting)					\
+			   batch->batch_used - batch->batch_emit_start,	\
+			   batch->batch_emitting);			\
+	if (batch->batch_used < batch->batch_emit_start +		\
+	    batch->batch_emitting)					\
 		FatalError("%s: ADVANCE_BATCH: under-used allocation %d/%d\n ", \
 			   __FUNCTION__,				\
-			   intel->batch_used - intel->batch_emit_start,	\
-			   intel->batch_emitting);			\
-	intel->batch_emitting = 0;					\
+			   batch->batch_used - batch->batch_emit_start,	\
+			   batch->batch_emitting);			\
+	batch->batch_emitting = 0;					\
 } while (0)
 
 void intel_next_vertex(intel_screen_private *intel);
diff --git a/src/intel_display.c b/src/intel_display.c
index d32224e..78f83aa 100644
--- a/src/intel_display.c
+++ b/src/intel_display.c
@@ -552,7 +552,7 @@  intel_crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height)
 		return NULL;
 	}
 
-	intel_set_pixmap_bo(rotate_pixmap, intel_crtc->rotate_bo);
+	intel_set_pixmap_bo(rotate_pixmap, intel_crtc->rotate_bo, RENDER_BATCH);
 
 	intel->shadow_present = TRUE;
 
@@ -568,7 +568,7 @@  intel_crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr rotate_pixmap, void *data)
 	struct intel_mode *mode = intel_crtc->mode;
 
 	if (rotate_pixmap) {
-		intel_set_pixmap_bo(rotate_pixmap, NULL);
+		intel_set_pixmap_bo(rotate_pixmap, NULL, RENDER_BATCH);
 		FreeScratchPixmapHeader(rotate_pixmap);
 	}
 
diff --git a/src/intel_dri.c b/src/intel_dri.c
index 67f7be9..b4876c8 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -93,7 +93,8 @@  static PixmapPtr get_front_buffer(DrawablePtr drawable)
 						   intel->front_pitch,
 						   intel->front_buffer->virtual);
 
-			intel_set_pixmap_bo(pixmap, intel->front_buffer);
+			intel_set_pixmap_bo(pixmap, intel->front_buffer, 
+				RENDER_BATCH);
 			intel_get_pixmap_private(pixmap)->offscreen = 0;
 			if (WindowDrawable(drawable->type))
 				screen->SetWindowPixmap((WindowPtr)drawable,
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 7e4a4a4..b9fb69d 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -431,9 +431,15 @@  static int intel_init_bufmgr(intel_screen_private *intel)
 	drm_intel_bufmgr_gem_enable_reuse(intel->bufmgr);
 	drm_intel_bufmgr_gem_enable_fenced_relocs(intel->bufmgr);
 
-	list_init(&intel->batch_pixmaps);
-	list_init(&intel->flush_pixmaps);
-	list_init(&intel->in_flight);
+	list_init(&intel->batch[RENDER_BATCH].batch_pixmaps);
+	list_init(&intel->batch[RENDER_BATCH].flush_pixmaps);
+	list_init(&intel->batch[RENDER_BATCH].in_flight);
+
+	if (IS_GEN6(intel)) {
+		list_init(&intel->batch[BLT_BATCH].batch_pixmaps);
+		list_init(&intel->batch[BLT_BATCH].flush_pixmaps);
+		list_init(&intel->batch[BLT_BATCH].in_flight);
+	}
 
 	return TRUE;
 }
@@ -801,8 +807,8 @@  intel_flush_callback(CallbackListPtr *list,
 		 * framebuffer until significantly later.
 		 */
 		intel_batch_submit(scrn,
-				   intel->need_mi_flush ||
-				   !list_is_empty(&intel->flush_pixmaps));
+				   intel->batch[RENDER_BATCH].need_mi_flush 
+				   ||!list_is_empty(&intel->batch[RENDER_BATCH].flush_pixmaps), RENDER_BATCH);
 	}
 }
 
@@ -1220,7 +1226,7 @@  static Bool I830CloseScreen(int scrnIndex, ScreenPtr screen)
 	if (intel->front_buffer) {
 		if (!intel->use_shadow)
 			intel_set_pixmap_bo(screen->GetScreenPixmap(screen),
-					    NULL);
+					    NULL, RENDER_BATCH);
 		intel_mode_remove_fb(intel);
 		drm_intel_bo_unreference(intel->front_buffer);
 		intel->front_buffer = NULL;
diff --git a/src/intel_uxa.c b/src/intel_uxa.c
index 14c47a0..05ac3d2 100644
--- a/src/intel_uxa.c
+++ b/src/intel_uxa.c
@@ -106,21 +106,21 @@  ironlake_blt_workaround(ScrnInfoPtr scrn)
 
 Bool
 intel_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table,
-			 int num_bos)
+			 int num_bos, int batch_idx)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct batch *batch = &intel->batch[batch_idx];
 
-	if (intel->batch_bo == NULL) {
+	if (batch->batch_bo == NULL) {
 		intel_debug_fallback(scrn, "VT inactive\n");
 		return FALSE;
 	}
 
-	bo_table[0] = intel->batch_bo;
+	bo_table[0] = batch->batch_bo;
 	if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != 0) {
-		intel_batch_submit(scrn, FALSE);
-		bo_table[0] = intel->batch_bo;
-		if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) !=
-		    0) {
+		intel_batch_submit(scrn, FALSE, batch_idx);
+		bo_table[0] = batch->batch_bo;
+		if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != 0) {
 			intel_debug_fallback(scrn, "Couldn't get aperture "
 					    "space for BOs\n");
 			return FALSE;
@@ -252,7 +252,8 @@  i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
 	if (!intel_check_pitch_2d(pixmap))
 		return FALSE;
 
-	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
+	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), 
+				RENDER_BATCH))
 		return FALSE;
 
 	intel->BR[13] = (I830PatternROP[alu] & 0xff) << 16;
@@ -327,7 +328,7 @@  static void i830_uxa_done_solid(PixmapPtr pixmap)
 {
 	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
 
-	intel_debug_flush(scrn);
+	intel_debug_flush(scrn, RENDER_BATCH);
 }
 
 /**
@@ -385,7 +386,8 @@  i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
 		intel_get_pixmap_bo(dest),
 	};
 
-	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
+	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), 
+		RENDER_BATCH))
 		return FALSE;
 
 	intel->render_source = source;
@@ -496,7 +498,7 @@  static void i830_uxa_done_copy(PixmapPtr dest)
 {
 	ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
 
-	intel_debug_flush(scrn);
+	intel_debug_flush(scrn, RENDER_BATCH);
 }
 
 /**
@@ -512,7 +514,7 @@  static void i830_done_composite(PixmapPtr dest)
 	if (intel->vertex_flush)
 		intel->vertex_flush(intel);
 
-	intel_debug_flush(scrn);
+	intel_debug_flush(scrn, RENDER_BATCH);
 }
 
 #define xFixedToFloat(val) \
@@ -609,7 +611,7 @@  dri_bo *intel_get_pixmap_bo(PixmapPtr pixmap)
 	return intel->bo;
 }
 
-void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
+void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo, int batch_idx)
 {
 	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -631,7 +633,7 @@  void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
 			list_del(&priv->batch);
 			list_del(&priv->flush);
 		} else {
-			list_add(&priv->in_flight, &intel->in_flight);
+			list_add(&priv->in_flight, &intel->batch[batch_idx].in_flight);
 			priv = NULL;
 		}
 
@@ -695,7 +697,7 @@  static Bool intel_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access)
 
 	if (!list_is_empty(&priv->batch) &&
 	    (access == UXA_ACCESS_RW || priv->batch_write))
-		intel_batch_submit(scrn, FALSE);
+		intel_batch_submit(scrn, FALSE, RENDER_BATCH);
 
 	if (priv->tiling || bo->size <= intel->max_gtt_map_size)
 		ret = drm_intel_gem_bo_map_gtt(bo);
@@ -788,7 +790,7 @@  static Bool intel_uxa_put_image(PixmapPtr pixmap,
 						   w, h,
 						   0, 0,
 						   stride, NULL);
-			intel_set_pixmap_bo(pixmap, bo);
+			intel_set_pixmap_bo(pixmap, bo, RENDER_BATCH);
 			dri_bo_unreference(bo);
 
 			return intel_uxa_pixmap_put_image(pixmap, src, src_pitch, 0, 0, w, h);
@@ -909,7 +911,7 @@  static Bool intel_uxa_get_image(PixmapPtr pixmap,
 
 		FreeScratchGC(gc);
 
-		intel_batch_submit(xf86Screens[screen->myNum], FALSE);
+		intel_batch_submit(xf86Screens[screen->myNum], FALSE, RENDER_BATCH);
 
 		x = y = 0;
 		pixmap = scratch;
@@ -932,7 +934,7 @@  void intel_uxa_block_handler(intel_screen_private *intel)
 		 * and beyond rendering results may not hit the
 		 * framebuffer until significantly later.
 		 */
-		intel_batch_submit(intel->scrn, TRUE);
+		intel_batch_submit(intel->scrn, TRUE, RENDER_BATCH);
 
 		DamageEmpty(intel->shadow_damage);
 	}
@@ -965,6 +967,8 @@  intel_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 		unsigned int size, tiling;
 		int stride;
 
+		struct batch *batch = &intel->batch[RENDER_BATCH];
+
 		/* Always attempt to tile, compute_size() will remove the
 		 * tiling for pixmaps that are either too large or too small
 		 * to be effectively tiled.
@@ -1008,7 +1012,7 @@  intel_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 				aligned_h = ALIGN(h, 2);
 
 			list_foreach_entry(priv, struct intel_pixmap,
-					   &intel->in_flight,
+					   &batch->in_flight,
 					   in_flight) {
 				if (priv->tiling != tiling)
 					continue;
@@ -1074,7 +1078,7 @@  intel_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 static Bool intel_uxa_destroy_pixmap(PixmapPtr pixmap)
 {
 	if (pixmap->refcnt == 1)
-		intel_set_pixmap_bo(pixmap, NULL);
+		intel_set_pixmap_bo(pixmap, NULL, RENDER_BATCH);
 	fbDestroyPixmap(pixmap);
 	return TRUE;
 }
@@ -1091,7 +1095,7 @@  void intel_uxa_create_screen_resources(ScreenPtr screen)
 		intel_shadow_create(intel);
 	} else {
 		PixmapPtr pixmap = screen->GetScreenPixmap(screen);
-		intel_set_pixmap_bo(pixmap, bo);
+		intel_set_pixmap_bo(pixmap, bo, RENDER_BATCH);
 		intel_get_pixmap_private(pixmap)->busy = 1;
 		screen->ModifyPixmapHeader(pixmap,
 					   scrn->virtualX,