diff mbox

[PATH,v2,4/6] Xv: setup pipeline for Xv on Sandybridge

Message ID 1288145853-25252-5-git-send-email-haihao.xiang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiang, Haihao Oct. 27, 2010, 2:17 a.m. UTC
None
diff mbox

Patch

diff --git a/src/brw_structs.h b/src/brw_structs.h
index 1cee5bd..d089ba1 100644
--- a/src/brw_structs.h
+++ b/src/brw_structs.h
@@ -1487,4 +1487,104 @@  struct brw_interface_descriptor {
     } desc3;
 };
 
+struct gen6_blend_state
+{
+	struct {
+		unsigned int dest_blend_factor:5;
+		unsigned int source_blend_factor:5;
+		unsigned int pad3:1;
+		unsigned int blend_func:3;
+		unsigned int pad2:1;
+		unsigned int ia_dest_blend_factor:5;
+		unsigned int ia_source_blend_factor:5;
+		unsigned int pad1:1;
+		unsigned int ia_blend_func:3;
+		unsigned int pad0:1;
+		unsigned int ia_blend_enable:1;
+		unsigned int blend_enable:1;
+	} blend0;
+
+	struct {
+		unsigned int post_blend_clamp_enable:1;
+		unsigned int pre_blend_clamp_enable:1;
+		unsigned int clamp_range:2;
+		unsigned int pad0:4;
+		unsigned int x_dither_offset:2;
+		unsigned int y_dither_offset:2;
+		unsigned int dither_enable:1;
+		unsigned int alpha_test_func:3;
+		unsigned int alpha_test_enable:1;
+		unsigned int pad1:1;
+		unsigned int logic_op_func:4;
+		unsigned int logic_op_enable:1;
+		unsigned int pad2:1;
+		unsigned int write_disable_b:1;
+		unsigned int write_disable_g:1;
+		unsigned int write_disable_r:1;
+		unsigned int write_disable_a:1;
+		unsigned int pad3:1;
+		unsigned int alpha_to_coverage_dither:1;
+		unsigned int alpha_to_one:1;
+		unsigned int alpha_to_coverage:1;
+	} blend1;
+};
+
+struct gen6_color_calc_state
+{
+	struct {
+		unsigned int alpha_test_format:1;
+		unsigned int pad0:14;
+		unsigned int round_disable:1;
+		unsigned int bf_stencil_ref:8;
+		unsigned int stencil_ref:8;
+	} cc0;
+
+	union {
+		float alpha_ref_f;
+		struct {
+			unsigned int ui:8;
+			unsigned int pad0:24;
+		} alpha_ref_fi;
+	} cc1;
+
+	float constant_r;
+	float constant_g;
+	float constant_b;
+	float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+	struct {
+		unsigned int pad0:3;
+		unsigned int bf_stencil_pass_depth_pass_op:3;
+		unsigned int bf_stencil_pass_depth_fail_op:3;
+		unsigned int bf_stencil_fail_op:3;
+		unsigned int bf_stencil_func:3;
+		unsigned int bf_stencil_enable:1;
+		unsigned int pad1:2;
+		unsigned int stencil_write_enable:1;
+		unsigned int stencil_pass_depth_pass_op:3;
+		unsigned int stencil_pass_depth_fail_op:3;
+		unsigned int stencil_fail_op:3;
+		unsigned int stencil_func:3;
+		unsigned int stencil_enable:1;
+	} ds0;
+
+	struct {
+		unsigned int bf_stencil_write_mask:8;
+		unsigned int bf_stencil_test_mask:8;
+		unsigned int stencil_write_mask:8;
+		unsigned int stencil_test_mask:8;
+	} ds1;
+
+	struct {
+		unsigned int pad0:26;
+		unsigned int depth_write_enable:1;
+		unsigned int depth_test_func:3;
+		unsigned int pad1:1;
+		unsigned int depth_test_enable:1;
+	} ds2;
+};
+
 #endif
diff --git a/src/i965_reg.h b/src/i965_reg.h
index fe419dc..3953dab 100644
--- a/src/i965_reg.h
+++ b/src/i965_reg.h
@@ -22,6 +22,10 @@ 
 
 #define BRW_3DSTATE_PIPELINED_POINTERS		BRW_3D(3, 0, 0)
 #define BRW_3DSTATE_BINDING_TABLE_POINTERS	BRW_3D(3, 0, 1)
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS  (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS  (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS  (1 << 8) /* for GEN6 */
+
 #define BRW_3DSTATE_VERTEX_BUFFERS		BRW_3D(3, 0, 8)
 #define BRW_3DSTATE_VERTEX_ELEMENTS		BRW_3D(3, 0, 9)
 #define BRW_3DSTATE_INDEX_BUFFER		BRW_3D(3, 0, 0xa)
@@ -32,6 +36,9 @@ 
 #define BRW_3DSTATE_SAMPLER_PALETTE_LOAD	BRW_3D(3, 1, 2)
 #define BRW_3DSTATE_CHROMA_KEY			BRW_3D(3, 1, 4)
 #define BRW_3DSTATE_DEPTH_BUFFER		BRW_3D(3, 1, 5)
+# define BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT	29
+# define BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT	18
+
 #define BRW_3DSTATE_POLY_STIPPLE_OFFSET		BRW_3D(3, 1, 6)
 #define BRW_3DSTATE_POLY_STIPPLE_PATTERN	BRW_3D(3, 1, 7)
 #define BRW_3DSTATE_LINE_STIPPLE		BRW_3D(3, 1, 8)
@@ -44,6 +51,91 @@ 
 
 #define BRW_3DPRIMITIVE				BRW_3D(3, 3, 0)
 
+#define BRW_3DSTATE_CLEAR_PARAMS		BRW_3D(3, 1, 0x10)
+/* DW1 */
+# define BRW_3DSTATE_DEPTH_CLEAR_VALID		(1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS	BRW_3D(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS	(1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS	(1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS	(1 << 8)
+
+#define GEN6_3DSTATE_URB			BRW_3D(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT		16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT	0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT	8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT		0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS	BRW_3D(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC		(1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF		(1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP	(1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS		BRW_3D(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS				BRW_3D(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS				BRW_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT	0
+
+#define GEN6_3DSTATE_CLIP			BRW_3D(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF				BRW_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT		22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT	11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT	4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH			(0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE			(1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT			(2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK			(3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT		29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT		27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT		25
+
+
+#define GEN6_3DSTATE_WM				BRW_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF			27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT		16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT			25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE			(1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE			(1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE			(1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT			20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS		BRW_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS          	BRW_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS          	BRW_3D(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK		BRW_3D(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE		BRW_3D(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER		(0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT	(1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1			(0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4			(2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8			(3 << 1)
+
 #define PIPELINE_SELECT_3D		0
 #define PIPELINE_SELECT_MEDIA		1
 
@@ -80,16 +172,22 @@ 
 #define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
 #define BRW_PIPE_CONTROL_GLOBAL_GTT    (1 << 2)
 #define BRW_PIPE_CONTROL_LOCAL_PGTT    (0 << 2)
+#define BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
 
 /* VERTEX_BUFFER_STATE Structure */
 #define VB0_BUFFER_INDEX_SHIFT		27
+#define GEN6_VB0_BUFFER_INDEX_SHIFT	26
 #define VB0_VERTEXDATA			(0 << 26)
 #define VB0_INSTANCEDATA		(1 << 26)
+#define GEN6_VB0_VERTEXDATA		(0 << 20)
+#define GEN6_VB0_INSTANCEDATA		(1 << 20)
 #define VB0_BUFFER_PITCH_SHIFT		0
 
 /* VERTEX_ELEMENT_STATE Structure */
 #define VE0_VERTEX_BUFFER_INDEX_SHIFT	27
+#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT	26 /* for GEN6 */
 #define VE0_VALID			(1 << 26)
+#define GEN6_VE0_VALID			(1 << 25) /* for GEN6 */
 #define VE0_FORMAT_SHIFT		16
 #define VE0_OFFSET_SHIFT		0
 #define VE1_VFCOMPONENT_0_SHIFT		28
diff --git a/src/i965_video.c b/src/i965_video.c
index aaf10fa..235dfb9 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -134,6 +134,21 @@  static const uint32_t ps_kernel_planar_static_gen5[][4] = {
 #include "exa_wm_write.g4b.gen5"
 };
 
+/* programs for Sandybridge */
+static const uint32_t ps_kernel_packed_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_planar_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_planar.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
 static uint32_t float_to_uint(float f)
 {
 	union {
@@ -1256,4 +1271,616 @@  void i965_free_video(ScrnInfoPtr scrn)
 	intel->video.gen4_sampler_bo = NULL;
 	drm_intel_bo_unreference(intel->video.gen4_sip_kernel_bo);
 	intel->video.gen4_sip_kernel_bo = NULL;
+	drm_intel_bo_unreference(intel->video.wm_prog_packed_bo);
+	intel->video.wm_prog_packed_bo = NULL;
+	drm_intel_bo_unreference(intel->video.wm_prog_planar_bo);
+	intel->video.wm_prog_planar_bo = NULL;
+	drm_intel_bo_unreference(intel->video.gen6_blend_bo);
+	intel->video.gen6_blend_bo = NULL;
+	drm_intel_bo_unreference(intel->video.gen6_depth_stencil_bo);
+	intel->video.gen6_depth_stencil_bo = NULL;
+}
+
+/* for GEN6+ */
+static drm_intel_bo *
+gen6_create_cc_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct gen6_color_calc_state *cc_state;
+	drm_intel_bo *cc_bo;
+
+	if (intel_alloc_and_map(
+			intel,
+			"textured video cc state", 
+			sizeof(*cc_state), 
+			&cc_bo,
+			&cc_state) != 0)
+		return NULL;
+
+	cc_state->constant_r = 1.0;
+	cc_state->constant_g = 0.0;
+	cc_state->constant_b = 1.0;
+	cc_state->constant_a = 1.0;
+
+	drm_intel_bo_unmap(cc_bo);
+	return cc_bo;
+}
+
+static drm_intel_bo *
+gen6_create_blend_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct gen6_blend_state *blend_state;
+	drm_intel_bo *blend_bo;
+
+	if (intel_alloc_and_map(
+			intel,
+			"textured video blend state",
+			sizeof(*blend_state),
+			&blend_bo,
+			&blend_state) != 0)
+		return NULL;
+
+	blend_state->blend1.logic_op_enable = 1;
+	blend_state->blend1.logic_op_func = 0xc;
+
+	drm_intel_bo_unmap(blend_bo);
+	return blend_bo;
+}
+
+static drm_intel_bo *
+gen6_create_depth_stencil_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct gen6_depth_stencil_state *depth_stencil_state;
+	drm_intel_bo *depth_stencil_bo;
+
+	if (intel_alloc_and_map(
+			intel,
+			"textured video blend state",
+			sizeof(*depth_stencil_state),
+			&depth_stencil_bo,
+			&depth_stencil_state) != 0)
+		return NULL;
+
+	drm_intel_bo_unmap(depth_stencil_bo);
+	return depth_stencil_bo;
+}
+
+static Bool
+gen6_create_vidoe_objects(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	if (intel->video.gen4_sampler_bo == NULL)
+		intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn);
+		
+	if (intel->video.wm_prog_packed_bo == NULL)
+		intel->video.wm_prog_packed_bo =
+			i965_create_program(scrn,
+					&ps_kernel_packed_static_gen6[0][0],
+					sizeof(ps_kernel_packed_static_gen6));
+		
+	if (intel->video.wm_prog_planar_bo == NULL)
+		intel->video.wm_prog_planar_bo =
+			i965_create_program(scrn,
+					&ps_kernel_planar_static_gen6[0][0],
+					sizeof(ps_kernel_planar_static_gen6));
+
+	if (intel->video.gen4_cc_vp_bo == NULL)
+		intel->video.gen4_cc_vp_bo = i965_create_cc_vp_state(scrn);
+
+	if (intel->video.gen4_cc_bo == NULL)
+		intel->video.gen4_cc_bo = gen6_create_cc_state(scrn);
+
+	if (intel->video.gen6_blend_bo == NULL)
+		intel->video.gen6_blend_bo = gen6_create_blend_state(scrn);
+
+	if (intel->video.gen6_depth_stencil_bo == NULL)
+		intel->video.gen6_depth_stencil_bo = gen6_create_depth_stencil_state(scrn);
+
+
+	return (intel->video.gen4_sampler_bo != NULL &&
+		intel->video.wm_prog_packed_bo != NULL &&
+		intel->video.wm_prog_planar_bo != NULL &&
+		intel->video.gen4_cc_vp_bo != NULL &&
+		intel->video.gen4_cc_bo != NULL &&
+		intel->video.gen6_blend_bo != NULL &&
+		intel->video.gen6_depth_stencil_bo != NULL);
+}
+
+static void
+gen6_upload_invarient_states(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+	OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
+		BRW_PIPE_CONTROL_WC_FLUSH |
+		BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		BRW_PIPE_CONTROL_NOWRITE);
+	OUT_BATCH(0); /* write address */
+	OUT_BATCH(0); /* write data */
+
+	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
+	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+		GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+	OUT_BATCH(1);
+
+	/* Set system instruction pointer */
+	OUT_BATCH(BRW_STATE_SIP | 0);
+	OUT_BATCH(0);
+}
+
+static void
+gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
+	OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+}
+
+static void
+gen6_upload_viewport_state_pointers(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+		GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+		(4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_RELOC(intel->video.gen4_cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_upload_urb(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
+	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+		(24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+		(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+static void
+gen6_upload_cc_state_pointers(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+	OUT_RELOC(intel->video.gen6_blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	OUT_RELOC(intel->video.gen6_depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+}
+
+static void
+gen6_upload_sampler_state_pointers(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+		GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+		(4 - 2));
+	OUT_BATCH(0); /* VS */
+	OUT_BATCH(0); /* GS */
+	OUT_RELOC(intel->video.gen4_sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_upload_binding_table(ScrnInfoPtr scrn, uint32_t ps_binding_table_offset)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	/* Binding table pointers */
+	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
+		GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
+		(4 - 2));
+	OUT_BATCH(0);		/* vs */
+	OUT_BATCH(0);		/* gs */
+	/* Only the PS uses the binding table */
+	OUT_BATCH(ps_binding_table_offset);
+}
+
+static void
+gen6_upload_depth_buffer_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
+	OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
+		(BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
+	OUT_BATCH(0);
+}
+
+static void
+gen6_upload_drawing_rectangle(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2);
+	OUT_BATCH(0x00000000);	/* ymin, xmin */
+	OUT_BATCH((scrn->virtualX - 1) | (scrn->virtualY - 1) << 16);	/* ymax, xmax */
+	OUT_BATCH(0x00000000);	/* yorigin, xorigin */
+}
+
+static void 
+gen6_upload_vs_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	/* disable VS constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	
+	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
+	OUT_BATCH(0); /* without VS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+}
+
+static void 
+gen6_upload_gs_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	/* disable GS constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	
+	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+	OUT_BATCH(0); /* without GS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+}
+
+static void 
+gen6_upload_clip_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+	OUT_BATCH(0);
+}
+
+static void 
+gen6_upload_sf_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
+	OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+		(0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW9 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW14 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW19 */
+}
+
+static void 
+gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	/* disable WM constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
+	if (is_packed) {
+		OUT_RELOC(intel->video.wm_prog_packed_bo, 
+			I915_GEM_DOMAIN_INSTRUCTION, 0,
+			0);
+		OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+			(2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+	} else {
+		OUT_RELOC(intel->video.wm_prog_planar_bo,
+			I915_GEM_DOMAIN_INSTRUCTION, 0,
+			0);
+		OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+			(7 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+	}
+	OUT_BATCH(0);
+	OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
+	OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+		GEN6_3DSTATE_WM_DISPATCH_ENABLE |
+		GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+	OUT_BATCH((1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
+		GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+static void
+gen6_upload_vertex_element_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	/* Set up our vertex elements, sourced from the single vertex buffer. */
+	OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | (5 - 2));
+	/* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+	OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+		GEN6_VE0_VALID |
+		(BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+		(0 << VE0_OFFSET_SHIFT));
+	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+	/* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+	OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+		GEN6_VE0_VALID |
+		(BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+		(8 << VE0_OFFSET_SHIFT));
+	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
+		(BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+}
+
+static void
+gen6_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo, int n_src_surf)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	assert(n_src_surf == 1 || n_src_surf == 6);
+	IntelEmitInvarientState(scrn);
+	intel->last_3d = LAST_3D_VIDEO;
+
+	gen6_upload_invarient_states(scrn);
+	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
+	gen6_upload_viewport_state_pointers(scrn);
+	gen6_upload_urb(scrn);
+	gen6_upload_cc_state_pointers(scrn);
+	gen6_upload_sampler_state_pointers(scrn);
+	gen6_upload_vs_state(scrn);
+	gen6_upload_gs_state(scrn);
+	gen6_upload_clip_state(scrn);
+	gen6_upload_sf_state(scrn);
+	gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
+	gen6_upload_binding_table(scrn, (n_src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));;
+	gen6_upload_depth_buffer_state(scrn);
+	gen6_upload_drawing_rectangle(scrn);
+	gen6_upload_vertex_element_state(scrn);
+}
+
+void Gen6DisplayVideoTextured(ScrnInfoPtr scrn,
+			intel_adaptor_private *adaptor_priv, int id,
+			RegionPtr dstRegion,
+			short width, short height,
+			int video_pitch, int video_pitch2,
+			short src_w, short src_h,
+			short drw_w, short drw_h, PixmapPtr pixmap)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	BoxPtr pbox;
+	int nbox, dxo, dyo, pix_xoff, pix_yoff;
+	float src_scale_x, src_scale_y;
+	int src_surf;
+	int n_src_surf;
+	uint32_t src_surf_format;
+	uint32_t src_surf_base[6];
+	int src_width[6];
+	int src_height[6];
+	int src_pitch[6];
+	drm_intel_bo *surface_state_binding_table_bo;
+
+	src_surf_base[0] = adaptor_priv->YBufOffset;
+	src_surf_base[1] = adaptor_priv->YBufOffset;
+	src_surf_base[2] = adaptor_priv->VBufOffset;
+	src_surf_base[3] = adaptor_priv->VBufOffset;
+	src_surf_base[4] = adaptor_priv->UBufOffset;
+	src_surf_base[5] = adaptor_priv->UBufOffset;
+
+	if (is_planar_fourcc(id)) {
+		src_surf_format = BRW_SURFACEFORMAT_R8_UNORM;
+		src_width[1] = src_width[0] = width;
+		src_height[1] = src_height[0] = height;
+		src_pitch[1] = src_pitch[0] = video_pitch2;
+		src_width[4] = src_width[5] = src_width[2] = src_width[3] =
+			width / 2;
+		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
+			height / 2;
+		src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
+			video_pitch;
+		n_src_surf = 6;
+	} else {
+		if (id == FOURCC_UYVY)
+			src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY;
+		else
+			src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+		src_width[0] = width;
+		src_height[0] = height;
+		src_pitch[0] = video_pitch;
+		n_src_surf = 1;
+	}
+
+	surface_state_binding_table_bo = 
+		drm_intel_bo_alloc(intel->bufmgr,
+				"surface state & binding table",
+				(n_src_surf + 1) * (ALIGN(sizeof(struct brw_surface_state), 32) + sizeof(uint32_t)),
+				4096);
+
+	if (!surface_state_binding_table_bo)
+		return;
+				
+	i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0);
+
+	for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
+		i965_create_src_surface_state(scrn,
+					adaptor_priv->buf,
+					src_surf_base[src_surf],
+					src_width[src_surf],
+					src_height[src_surf],
+					src_pitch[src_surf],
+					src_surf_format,
+					surface_state_binding_table_bo,
+					(src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));
+	}
+
+	i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1);
+
+	if (!gen6_create_vidoe_objects(scrn)) {
+		drm_intel_bo_unreference(surface_state_binding_table_bo);
+		return;
+	}
+
+	/* Set up the offset for translating from the given region (in screen
+	 * coordinates) to the backing pixmap.
+	 */
+#ifdef COMPOSITE
+	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+	pix_xoff = 0;
+	pix_yoff = 0;
+#endif
+
+	dxo = dstRegion->extents.x1;
+	dyo = dstRegion->extents.y1;
+
+	/* Use normalized texture coordinates */
+	src_scale_x = ((float)src_w / width) / (float)drw_w;
+	src_scale_y = ((float)src_h / height) / (float)drw_h;
+
+	pbox = REGION_RECTS(dstRegion);
+	nbox = REGION_NUM_RECTS(dstRegion);
+	while (nbox--) {
+		int box_x1 = pbox->x1;
+		int box_y1 = pbox->y1;
+		int box_x2 = pbox->x2;
+		int box_y2 = pbox->y2;
+		int i;
+		drm_intel_bo *vb_bo;
+		float *vb;
+		drm_intel_bo *bo_table[] = {
+			NULL,	/* vb_bo */
+			intel->batch_bo,
+			surface_state_binding_table_bo,
+			intel->video.gen4_sampler_bo,
+			intel->video.wm_prog_packed_bo,
+			intel->video.wm_prog_planar_bo,
+			intel->video.gen4_cc_vp_bo,
+			intel->video.gen4_cc_bo,
+			intel->video.gen6_blend_bo,
+			intel->video.gen6_depth_stencil_bo,
+		};
+
+		pbox++;
+
+		if (intel_alloc_and_map(intel, "textured video vb", 4096,
+						&vb_bo, &vb) != 0)
+			break;
+		bo_table[0] = vb_bo;
+
+		i = 0;
+		vb[i++] = (box_x2 - dxo) * src_scale_x;
+		vb[i++] = (box_y2 - dyo) * src_scale_y;
+		vb[i++] = (float)box_x2 + pix_xoff;
+		vb[i++] = (float)box_y2 + pix_yoff;
+
+		vb[i++] = (box_x1 - dxo) * src_scale_x;
+		vb[i++] = (box_y2 - dyo) * src_scale_y;
+		vb[i++] = (float)box_x1 + pix_xoff;
+		vb[i++] = (float)box_y2 + pix_yoff;
+
+		vb[i++] = (box_x1 - dxo) * src_scale_x;
+		vb[i++] = (box_y1 - dyo) * src_scale_y;
+		vb[i++] = (float)box_x1 + pix_xoff;
+		vb[i++] = (float)box_y1 + pix_yoff;
+
+		drm_intel_bo_unmap(vb_bo);
+
+		/* If this command won't fit in the current batch, flush.
+		 * Assume that it does after being flushed.
+		 */
+		if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0)
+			intel_batch_submit(scrn, FALSE);
+
+		intel_batch_start_atomic(scrn, 200);
+		gen6_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf);
+
+		/* Set up the pointer to our vertex buffer */
+		OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2));
+		/* four 32-bit floats per vertex */
+		OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+			GEN6_VB0_VERTEXDATA | 
+			((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+		OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
+		OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, i * 4);
+		OUT_BATCH(0);	/* reserved */
+
+		OUT_BATCH(BRW_3DPRIMITIVE | 
+			BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 
+			(_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 
+			(0 << 9) | /* Internal Vertex Count */
+			(6 - 2));
+		OUT_BATCH(3);	/* vertex count per instance */
+		OUT_BATCH(0);	/* start vertex offset */
+		OUT_BATCH(1);	/* single instance */
+		OUT_BATCH(0);	/* start instance location */
+		OUT_BATCH(0);	/* index buffer offset, ignored */
+
+		intel_batch_end_atomic(scrn);
+		drm_intel_bo_unreference(vb_bo);
+	}
+
+	/* release reference once we're finished */
+	drm_intel_bo_unreference(surface_state_binding_table_bo);
+	intel_debug_flush(scrn);
 }
diff --git a/src/intel.h b/src/intel.h
index 7604eee..eb68cb1 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -377,6 +377,10 @@  typedef struct intel_screen_private {
 		drm_intel_bo *gen4_cc_vp_bo;
 		drm_intel_bo *gen4_sampler_bo;
 		drm_intel_bo *gen4_sip_kernel_bo;
+		drm_intel_bo *wm_prog_packed_bo;
+		drm_intel_bo *wm_prog_planar_bo;
+		drm_intel_bo *gen6_blend_bo;
+		drm_intel_bo *gen6_depth_stencil_bo;
 	} video;
 
 	/* Render accel state */
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index e7ca69d..01cb193 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -38,6 +38,7 @@ 
 #include "intel.h"
 #include "i830_reg.h"
 #include "i915_drm.h"
+#include "i965_reg.h"
 
 #define DUMP_BATCHBUFFERS NULL /* "/tmp/i915-batchbuffers.dump" */
 
@@ -146,14 +147,26 @@  void intel_batch_emit_flush(ScrnInfoPtr scrn)
 
 	assert (!intel->in_batch_atomic);
 
-	/* Big hammer, look to the pipelined flushes in future. */
-	flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
-	if (INTEL_INFO(intel)->gen >= 40)
+	if ((INTEL_INFO(intel)->gen >= 60)) {
+	    BEGIN_BATCH(4);
+	    OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+	    OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
+		    BRW_PIPE_CONTROL_WC_FLUSH |
+		    BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		    BRW_PIPE_CONTROL_NOWRITE);
+	    OUT_BATCH(0); /* write address */
+	    OUT_BATCH(0); /* write data */
+	    ADVANCE_BATCH();
+	} else {
+	    /* Big hammer, look to the pipelined flushes in future. */
+	    flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
+	    if (INTEL_INFO(intel)->gen >= 40)
 		flags = 0;
 
-	BEGIN_BATCH(1);
-	OUT_BATCH(MI_FLUSH | flags);
-	ADVANCE_BATCH();
+	    BEGIN_BATCH(1);
+	    OUT_BATCH(MI_FLUSH | flags);
+	    ADVANCE_BATCH();
+	}
 
 	intel_batch_do_flush(scrn);
 }
diff --git a/src/intel_video.h b/src/intel_video.h
index 5920d30..f405d40 100644
--- a/src/intel_video.h
+++ b/src/intel_video.h
@@ -81,6 +81,13 @@  void I965DisplayVideoTextured(ScrnInfoPtr scrn,
 			      short src_w, short src_h,
 			      short drw_w, short drw_h, PixmapPtr pixmap);
 
+void Gen6DisplayVideoTextured(ScrnInfoPtr scrn,
+			      intel_adaptor_private *adaptor_priv,
+			      int id, RegionPtr dstRegion, short width,
+			      short height, int video_pitch, int video_pitch2,
+			      short src_w, short src_h,
+			      short drw_w, short drw_h, PixmapPtr pixmap);
+
 void i965_free_video(ScrnInfoPtr scrn);
 
 int is_planar_fourcc(int id);