From patchwork Wed Oct 27 02:17:31 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Xiang, Haihao" X-Patchwork-Id: 284512 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id o9R2LpiS005685 for ; Wed, 27 Oct 2010 02:22:11 GMT Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id AB5789F002 for ; Tue, 26 Oct 2010 19:21:51 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by gabe.freedesktop.org (Postfix) with ESMTP id 5F14B9E79A for ; Tue, 26 Oct 2010 19:19:12 -0700 (PDT) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga102.fm.intel.com with ESMTP; 26 Oct 2010 19:19:12 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.58,244,1286175600"; d="scan'208";a="620658581" Received: from xhh-ilk32.sh.intel.com (HELO localhost.localdomain) ([10.239.36.76]) by fmsmga002.fm.intel.com with ESMTP; 26 Oct 2010 19:19:11 -0700 From: "Xiang, Haihao" To: intel-gfx@lists.freedesktop.org Date: Wed, 27 Oct 2010 10:17:31 +0800 Message-Id: <1288145853-25252-5-git-send-email-haihao.xiang@intel.com> X-Mailer: git-send-email 1.7.0.4 In-Reply-To: <1288145853-25252-1-git-send-email-haihao.xiang@intel.com> References: <1288145853-25252-1-git-send-email-haihao.xiang@intel.com> Subject: [Intel-gfx] [PATH v2 4/6] Xv: setup pipeline for Xv on Sandybridge X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.11 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Wed, 27 Oct 2010 02:22:11 +0000 (UTC) diff --git a/src/brw_structs.h b/src/brw_structs.h index 1cee5bd..d089ba1 100644 --- a/src/brw_structs.h +++ b/src/brw_structs.h @@ -1487,4 +1487,104 @@ struct brw_interface_descriptor { } desc3; }; +struct gen6_blend_state +{ + struct { + unsigned int dest_blend_factor:5; + unsigned int source_blend_factor:5; + unsigned int pad3:1; + unsigned int blend_func:3; + unsigned int pad2:1; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_source_blend_factor:5; + unsigned int pad1:1; + unsigned int ia_blend_func:3; + unsigned int pad0:1; + unsigned int ia_blend_enable:1; + unsigned int blend_enable:1; + } blend0; + + struct { + unsigned int post_blend_clamp_enable:1; + unsigned int pre_blend_clamp_enable:1; + unsigned int clamp_range:2; + unsigned int pad0:4; + unsigned int x_dither_offset:2; + unsigned int y_dither_offset:2; + unsigned int dither_enable:1; + unsigned int alpha_test_func:3; + unsigned int alpha_test_enable:1; + unsigned int pad1:1; + unsigned int logic_op_func:4; + unsigned int logic_op_enable:1; + unsigned int pad2:1; + unsigned int write_disable_b:1; + unsigned int write_disable_g:1; + unsigned int write_disable_r:1; + unsigned int write_disable_a:1; + unsigned int pad3:1; + unsigned int alpha_to_coverage_dither:1; + unsigned int alpha_to_one:1; + unsigned int alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + unsigned int alpha_test_format:1; + unsigned int pad0:14; + unsigned int round_disable:1; + unsigned int bf_stencil_ref:8; + unsigned int stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + unsigned int ui:8; + unsigned int pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } ds0; + + struct { + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + } ds1; + + struct { + unsigned int pad0:26; + unsigned int depth_write_enable:1; + unsigned int depth_test_func:3; + unsigned int pad1:1; + unsigned int depth_test_enable:1; + } ds2; +}; + #endif diff --git a/src/i965_reg.h b/src/i965_reg.h index fe419dc..3953dab 100644 --- a/src/i965_reg.h +++ b/src/i965_reg.h @@ -22,6 +22,10 @@ #define BRW_3DSTATE_PIPELINED_POINTERS BRW_3D(3, 0, 0) #define BRW_3DSTATE_BINDING_TABLE_POINTERS BRW_3D(3, 0, 1) +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */ + #define BRW_3DSTATE_VERTEX_BUFFERS BRW_3D(3, 0, 8) #define BRW_3DSTATE_VERTEX_ELEMENTS BRW_3D(3, 0, 9) #define BRW_3DSTATE_INDEX_BUFFER BRW_3D(3, 0, 0xa) @@ -32,6 +36,9 @@ #define BRW_3DSTATE_SAMPLER_PALETTE_LOAD BRW_3D(3, 1, 2) #define BRW_3DSTATE_CHROMA_KEY BRW_3D(3, 1, 4) #define BRW_3DSTATE_DEPTH_BUFFER BRW_3D(3, 1, 5) +# define BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29 +# define BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18 + #define BRW_3DSTATE_POLY_STIPPLE_OFFSET BRW_3D(3, 1, 6) #define BRW_3DSTATE_POLY_STIPPLE_PATTERN BRW_3D(3, 1, 7) #define BRW_3DSTATE_LINE_STIPPLE BRW_3D(3, 1, 8) @@ -44,6 +51,91 @@ #define BRW_3DPRIMITIVE BRW_3D(3, 3, 0) +#define BRW_3DSTATE_CLEAR_PARAMS BRW_3D(3, 1, 0x10) +/* DW1 */ +# define BRW_3DSTATE_DEPTH_CLEAR_VALID (1 << 15) + +/* for GEN6+ */ +#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS BRW_3D(3, 0, 0x02) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8) + +#define GEN6_3DSTATE_URB BRW_3D(3, 0, 0x05) +/* DW1 */ +# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16 +# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0 +/* DW2 */ +# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0 + +#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS BRW_3D(3, 0, 0x0d) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10) + +#define GEN6_3DSTATE_CC_STATE_POINTERS BRW_3D(3, 0, 0x0e) + +#define GEN6_3DSTATE_VS BRW_3D(3, 0, 0x10) + +#define GEN6_3DSTATE_GS BRW_3D(3, 0, 0x11) +/* DW4 */ +# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0 + +#define GEN6_3DSTATE_CLIP BRW_3D(3, 0, 0x12) + +#define GEN6_3DSTATE_SF BRW_3D(3, 0, 0x13) +/* DW1 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +/* DW3 */ +# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) +# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29) +# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29) +# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29) +/* DW4 */ +# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 + + +#define GEN6_3DSTATE_WM BRW_3D(3, 0, 0x14) +/* DW2 */ +# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27 +# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16 +/* DW5 */ +# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 +# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) + + +#define GEN6_3DSTATE_CONSTANT_VS BRW_3D(3, 0, 0x15) +#define GEN6_3DSTATE_CONSTANT_GS BRW_3D(3, 0, 0x16) +#define GEN6_3DSTATE_CONSTANT_PS BRW_3D(3, 0, 0x17) + +#define GEN6_3DSTATE_SAMPLE_MASK BRW_3D(3, 0, 0x18) + +#define GEN6_3DSTATE_MULTISAMPLE BRW_3D(3, 1, 0x0d) +/* DW1 */ +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + #define PIPELINE_SELECT_3D 0 #define PIPELINE_SELECT_MEDIA 1 @@ -80,16 +172,22 @@ #define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) #define BRW_PIPE_CONTROL_GLOBAL_GTT (1 << 2) #define BRW_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) /* VERTEX_BUFFER_STATE Structure */ #define VB0_BUFFER_INDEX_SHIFT 27 +#define GEN6_VB0_BUFFER_INDEX_SHIFT 26 #define VB0_VERTEXDATA (0 << 26) #define VB0_INSTANCEDATA (1 << 26) +#define GEN6_VB0_VERTEXDATA (0 << 20) +#define GEN6_VB0_INSTANCEDATA (1 << 20) #define VB0_BUFFER_PITCH_SHIFT 0 /* VERTEX_ELEMENT_STATE Structure */ #define VE0_VERTEX_BUFFER_INDEX_SHIFT 27 +#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */ #define VE0_VALID (1 << 26) +#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */ #define VE0_FORMAT_SHIFT 16 #define VE0_OFFSET_SHIFT 0 #define VE1_VFCOMPONENT_0_SHIFT 28 diff --git a/src/i965_video.c b/src/i965_video.c index aaf10fa..235dfb9 100644 --- a/src/i965_video.c +++ b/src/i965_video.c @@ -134,6 +134,21 @@ static const uint32_t ps_kernel_planar_static_gen5[][4] = { #include "exa_wm_write.g4b.gen5" }; +/* programs for Sandybridge */ +static const uint32_t ps_kernel_packed_static_gen6[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_planar_static_gen6[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_planar.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + static uint32_t float_to_uint(float f) { union { @@ -1256,4 +1271,616 @@ void i965_free_video(ScrnInfoPtr scrn) intel->video.gen4_sampler_bo = NULL; drm_intel_bo_unreference(intel->video.gen4_sip_kernel_bo); intel->video.gen4_sip_kernel_bo = NULL; + drm_intel_bo_unreference(intel->video.wm_prog_packed_bo); + intel->video.wm_prog_packed_bo = NULL; + drm_intel_bo_unreference(intel->video.wm_prog_planar_bo); + intel->video.wm_prog_planar_bo = NULL; + drm_intel_bo_unreference(intel->video.gen6_blend_bo); + intel->video.gen6_blend_bo = NULL; + drm_intel_bo_unreference(intel->video.gen6_depth_stencil_bo); + intel->video.gen6_depth_stencil_bo = NULL; +} + +/* for GEN6+ */ +static drm_intel_bo * +gen6_create_cc_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen6_color_calc_state *cc_state; + drm_intel_bo *cc_bo; + + if (intel_alloc_and_map( + intel, + "textured video cc state", + sizeof(*cc_state), + &cc_bo, + &cc_state) != 0) + return NULL; + + cc_state->constant_r = 1.0; + cc_state->constant_g = 0.0; + cc_state->constant_b = 1.0; + cc_state->constant_a = 1.0; + + drm_intel_bo_unmap(cc_bo); + return cc_bo; +} + +static drm_intel_bo * +gen6_create_blend_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen6_blend_state *blend_state; + drm_intel_bo *blend_bo; + + if (intel_alloc_and_map( + intel, + "textured video blend state", + sizeof(*blend_state), + &blend_bo, + &blend_state) != 0) + return NULL; + + blend_state->blend1.logic_op_enable = 1; + blend_state->blend1.logic_op_func = 0xc; + + drm_intel_bo_unmap(blend_bo); + return blend_bo; +} + +static drm_intel_bo * +gen6_create_depth_stencil_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen6_depth_stencil_state *depth_stencil_state; + drm_intel_bo *depth_stencil_bo; + + if (intel_alloc_and_map( + intel, + "textured video blend state", + sizeof(*depth_stencil_state), + &depth_stencil_bo, + &depth_stencil_state) != 0) + return NULL; + + drm_intel_bo_unmap(depth_stencil_bo); + return depth_stencil_bo; +} + +static Bool +gen6_create_vidoe_objects(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel->video.gen4_sampler_bo == NULL) + intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn); + + if (intel->video.wm_prog_packed_bo == NULL) + intel->video.wm_prog_packed_bo = + i965_create_program(scrn, + &ps_kernel_packed_static_gen6[0][0], + sizeof(ps_kernel_packed_static_gen6)); + + if (intel->video.wm_prog_planar_bo == NULL) + intel->video.wm_prog_planar_bo = + i965_create_program(scrn, + &ps_kernel_planar_static_gen6[0][0], + sizeof(ps_kernel_planar_static_gen6)); + + if (intel->video.gen4_cc_vp_bo == NULL) + intel->video.gen4_cc_vp_bo = i965_create_cc_vp_state(scrn); + + if (intel->video.gen4_cc_bo == NULL) + intel->video.gen4_cc_bo = gen6_create_cc_state(scrn); + + if (intel->video.gen6_blend_bo == NULL) + intel->video.gen6_blend_bo = gen6_create_blend_state(scrn); + + if (intel->video.gen6_depth_stencil_bo == NULL) + intel->video.gen6_depth_stencil_bo = gen6_create_depth_stencil_state(scrn); + + + return (intel->video.gen4_sampler_bo != NULL && + intel->video.wm_prog_packed_bo != NULL && + intel->video.wm_prog_planar_bo != NULL && + intel->video.gen4_cc_vp_bo != NULL && + intel->video.gen4_cc_bo != NULL && + intel->video.gen6_blend_bo != NULL && + intel->video.gen6_depth_stencil_bo != NULL); +} + +static void +gen6_upload_invarient_states(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH | + BRW_PIPE_CONTROL_WC_FLUSH | + BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH | + BRW_PIPE_CONTROL_NOWRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + + OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(1); + + /* Set system instruction pointer */ + OUT_BATCH(BRW_STATE_SIP | 0); + OUT_BATCH(0); +} + +static void +gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ + OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ +} + +static void +gen6_upload_viewport_state_pointers(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | + GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | + (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_RELOC(intel->video.gen4_cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); +} + +static void +gen6_upload_urb(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); + OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | + (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ + OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | + (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ +} + +static void +gen6_upload_cc_state_pointers(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); + OUT_RELOC(intel->video.gen6_blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(intel->video.gen6_depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); +} + +static void +gen6_upload_sampler_state_pointers(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | + GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + OUT_RELOC(intel->video.gen4_sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); +} + +static void +gen6_upload_binding_table(ScrnInfoPtr scrn, uint32_t ps_binding_table_offset) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* Binding table pointers */ + OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | + GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + /* Only the PS uses the binding table */ + OUT_BATCH(ps_binding_table_offset); +} + +static void +gen6_upload_depth_buffer_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) | + (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2)); + OUT_BATCH(0); +} + +static void +gen6_upload_drawing_rectangle(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH((scrn->virtualX - 1) | (scrn->virtualY - 1) << 16); /* ymax, xmax */ + OUT_BATCH(0x00000000); /* yorigin, xorigin */ +} + +static void +gen6_upload_vs_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* disable VS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); + OUT_BATCH(0); /* without VS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +static void +gen6_upload_gs_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* disable GS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(0); /* without GS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +static void +gen6_upload_clip_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + OUT_BATCH(0); +} + +static void +gen6_upload_sf_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); + OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) | + (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) | + (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH(0); + OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW9 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW14 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW19 */ +} + +static void +gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* disable WM constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); + if (is_packed) { + OUT_RELOC(intel->video.wm_prog_packed_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | + (2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + } else { + OUT_RELOC(intel->video.wm_prog_planar_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | + (7 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + } + OUT_BATCH(0); + OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ + OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + GEN6_3DSTATE_WM_DISPATCH_ENABLE | + GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); + OUT_BATCH((1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | + GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_upload_vertex_element_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* Set up our vertex elements, sourced from the single vertex buffer. */ + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | (5 - 2)); + /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ + OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ + OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); +} + +static void +gen6_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo, int n_src_surf) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + assert(n_src_surf == 1 || n_src_surf == 6); + IntelEmitInvarientState(scrn); + intel->last_3d = LAST_3D_VIDEO; + + gen6_upload_invarient_states(scrn); + gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); + gen6_upload_viewport_state_pointers(scrn); + gen6_upload_urb(scrn); + gen6_upload_cc_state_pointers(scrn); + gen6_upload_sampler_state_pointers(scrn); + gen6_upload_vs_state(scrn); + gen6_upload_gs_state(scrn); + gen6_upload_clip_state(scrn); + gen6_upload_sf_state(scrn); + gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); + gen6_upload_binding_table(scrn, (n_src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));; + gen6_upload_depth_buffer_state(scrn); + gen6_upload_drawing_rectangle(scrn); + gen6_upload_vertex_element_state(scrn); +} + +void Gen6DisplayVideoTextured(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, int id, + RegionPtr dstRegion, + short width, short height, + int video_pitch, int video_pitch2, + short src_w, short src_h, + short drw_w, short drw_h, PixmapPtr pixmap) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + BoxPtr pbox; + int nbox, dxo, dyo, pix_xoff, pix_yoff; + float src_scale_x, src_scale_y; + int src_surf; + int n_src_surf; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + drm_intel_bo *surface_state_binding_table_bo; + + src_surf_base[0] = adaptor_priv->YBufOffset; + src_surf_base[1] = adaptor_priv->YBufOffset; + src_surf_base[2] = adaptor_priv->VBufOffset; + src_surf_base[3] = adaptor_priv->VBufOffset; + src_surf_base[4] = adaptor_priv->UBufOffset; + src_surf_base[5] = adaptor_priv->UBufOffset; + + if (is_planar_fourcc(id)) { + src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = width; + src_height[1] = src_height[0] = height; + src_pitch[1] = src_pitch[0] = video_pitch2; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + video_pitch; + n_src_surf = 6; + } else { + if (id == FOURCC_UYVY) + src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = width; + src_height[0] = height; + src_pitch[0] = video_pitch; + n_src_surf = 1; + } + + surface_state_binding_table_bo = + drm_intel_bo_alloc(intel->bufmgr, + "surface state & binding table", + (n_src_surf + 1) * (ALIGN(sizeof(struct brw_surface_state), 32) + sizeof(uint32_t)), + 4096); + + if (!surface_state_binding_table_bo) + return; + + i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); + + for (src_surf = 0; src_surf < n_src_surf; src_surf++) { + i965_create_src_surface_state(scrn, + adaptor_priv->buf, + src_surf_base[src_surf], + src_width[src_surf], + src_height[src_surf], + src_pitch[src_surf], + src_surf_format, + surface_state_binding_table_bo, + (src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32)); + } + + i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); + + if (!gen6_create_vidoe_objects(scrn)) { + drm_intel_bo_unreference(surface_state_binding_table_bo); + return; + } + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + dxo = dstRegion->extents.x1; + dyo = dstRegion->extents.y1; + + /* Use normalized texture coordinates */ + src_scale_x = ((float)src_w / width) / (float)drw_w; + src_scale_y = ((float)src_h / height) / (float)drw_h; + + pbox = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + int box_x1 = pbox->x1; + int box_y1 = pbox->y1; + int box_x2 = pbox->x2; + int box_y2 = pbox->y2; + int i; + drm_intel_bo *vb_bo; + float *vb; + drm_intel_bo *bo_table[] = { + NULL, /* vb_bo */ + intel->batch_bo, + surface_state_binding_table_bo, + intel->video.gen4_sampler_bo, + intel->video.wm_prog_packed_bo, + intel->video.wm_prog_planar_bo, + intel->video.gen4_cc_vp_bo, + intel->video.gen4_cc_bo, + intel->video.gen6_blend_bo, + intel->video.gen6_depth_stencil_bo, + }; + + pbox++; + + if (intel_alloc_and_map(intel, "textured video vb", 4096, + &vb_bo, &vb) != 0) + break; + bo_table[0] = vb_bo; + + i = 0; + vb[i++] = (box_x2 - dxo) * src_scale_x; + vb[i++] = (box_y2 - dyo) * src_scale_y; + vb[i++] = (float)box_x2 + pix_xoff; + vb[i++] = (float)box_y2 + pix_yoff; + + vb[i++] = (box_x1 - dxo) * src_scale_x; + vb[i++] = (box_y2 - dyo) * src_scale_y; + vb[i++] = (float)box_x1 + pix_xoff; + vb[i++] = (float)box_y2 + pix_yoff; + + vb[i++] = (box_x1 - dxo) * src_scale_x; + vb[i++] = (box_y1 - dyo) * src_scale_y; + vb[i++] = (float)box_x1 + pix_xoff; + vb[i++] = (float)box_y1 + pix_yoff; + + drm_intel_bo_unmap(vb_bo); + + /* If this command won't fit in the current batch, flush. + * Assume that it does after being flushed. + */ + if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0) + intel_batch_submit(scrn, FALSE); + + intel_batch_start_atomic(scrn, 200); + gen6_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf); + + /* Set up the pointer to our vertex buffer */ + OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); + /* four 32-bit floats per vertex */ + OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | + GEN6_VB0_VERTEXDATA | + ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, i * 4); + OUT_BATCH(0); /* reserved */ + + OUT_BATCH(BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | /* Internal Vertex Count */ + (6 - 2)); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); /* start vertex offset */ + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + + intel_batch_end_atomic(scrn); + drm_intel_bo_unreference(vb_bo); + } + + /* release reference once we're finished */ + drm_intel_bo_unreference(surface_state_binding_table_bo); + intel_debug_flush(scrn); } diff --git a/src/intel.h b/src/intel.h index 7604eee..eb68cb1 100644 --- a/src/intel.h +++ b/src/intel.h @@ -377,6 +377,10 @@ typedef struct intel_screen_private { drm_intel_bo *gen4_cc_vp_bo; drm_intel_bo *gen4_sampler_bo; drm_intel_bo *gen4_sip_kernel_bo; + drm_intel_bo *wm_prog_packed_bo; + drm_intel_bo *wm_prog_planar_bo; + drm_intel_bo *gen6_blend_bo; + drm_intel_bo *gen6_depth_stencil_bo; } video; /* Render accel state */ diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index e7ca69d..01cb193 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -38,6 +38,7 @@ #include "intel.h" #include "i830_reg.h" #include "i915_drm.h" +#include "i965_reg.h" #define DUMP_BATCHBUFFERS NULL /* "/tmp/i915-batchbuffers.dump" */ @@ -146,14 +147,26 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn) assert (!intel->in_batch_atomic); - /* Big hammer, look to the pipelined flushes in future. */ - flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE; - if (INTEL_INFO(intel)->gen >= 40) + if ((INTEL_INFO(intel)->gen >= 60)) { + BEGIN_BATCH(4); + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH | + BRW_PIPE_CONTROL_WC_FLUSH | + BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH | + BRW_PIPE_CONTROL_NOWRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } else { + /* Big hammer, look to the pipelined flushes in future. */ + flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE; + if (INTEL_INFO(intel)->gen >= 40) flags = 0; - BEGIN_BATCH(1); - OUT_BATCH(MI_FLUSH | flags); - ADVANCE_BATCH(); + BEGIN_BATCH(1); + OUT_BATCH(MI_FLUSH | flags); + ADVANCE_BATCH(); + } intel_batch_do_flush(scrn); } diff --git a/src/intel_video.h b/src/intel_video.h index 5920d30..f405d40 100644 --- a/src/intel_video.h +++ b/src/intel_video.h @@ -81,6 +81,13 @@ void I965DisplayVideoTextured(ScrnInfoPtr scrn, short src_w, short src_h, short drw_w, short drw_h, PixmapPtr pixmap); +void Gen6DisplayVideoTextured(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, + int id, RegionPtr dstRegion, short width, + short height, int video_pitch, int video_pitch2, + short src_w, short src_h, + short drw_w, short drw_h, PixmapPtr pixmap); + void i965_free_video(ScrnInfoPtr scrn); int is_planar_fourcc(int id);