diff mbox

[22/90] assembler: Update the disassembler code

Message ID 1359991705-5254-23-git-send-email-damien.lespiau@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lespiau, Damien Feb. 4, 2013, 3:27 p.m. UTC
From Mesa. This imports a bit more the of brw_eu* infrastructure (which
is going towards the right direction!) from mesa and the update is quite
a significant improvement over what we had.

I also verified that the changes that were done on the assembler old
version of brw_disasm.c were already supported by the Mesa version, and
indeed they were.

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
---
 assembler/Makefile.am                |    4 +-
 assembler/{disasm.c => brw_disasm.c} |  660 +++++++++++++++++++++++-----
 assembler/brw_eu.h                   |  405 +++++++++++++++++
 assembler/brw_reg.h                  |  808 ++++++++++++++++++++++++++++++++++
 assembler/disasm-main.c              |   20 +-
 assembler/gen4asm.h                  |    3 -
 6 files changed, 1783 insertions(+), 117 deletions(-)
 rename assembler/{disasm.c => brw_disasm.c} (52%)
 create mode 100644 assembler/brw_eu.h
 create mode 100644 assembler/brw_reg.h
diff mbox

Patch

diff --git a/assembler/Makefile.am b/assembler/Makefile.am
index 5914356..8843e1a 100644
--- a/assembler/Makefile.am
+++ b/assembler/Makefile.am
@@ -11,6 +11,8 @@  gram.h: gram.c
 
 intel_gen4asm_SOURCES =	\
 	brw_defines.h	\
+	brw_eu.h	\
+	brw_reg.h	\
 	brw_structs.h	\
 	gen4asm.h	\
 	gram.y		\
@@ -19,7 +21,7 @@  intel_gen4asm_SOURCES =	\
 	$(NULL)
 
 intel_gen4disasm_SOURCES =  \
-	disasm.c disasm-main.c
+	brw_disasm.c disasm-main.c
 
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = intel-gen4asm.pc
diff --git a/assembler/disasm.c b/assembler/brw_disasm.c
similarity index 52%
rename from assembler/disasm.c
rename to assembler/brw_disasm.c
index 9ebbeab..8eaeb78 100644
--- a/assembler/disasm.c
+++ b/assembler/brw_disasm.c
@@ -28,13 +28,9 @@ 
 #include <stdarg.h>
 
 #include "gen4asm.h"
-#include "brw_defines.h"
+#include "brw_eu.h"
 
-struct {
-    char    *name;
-    int	    nsrc;
-    int	    ndst;
-} opcode[128] = {
+const struct opcode_desc opcode_descs[128] = {
     [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
     [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
     [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
@@ -48,13 +44,15 @@  struct {
     [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
     [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
-    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1},
+    [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
 
     [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
@@ -71,12 +69,12 @@  struct {
     [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
     [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
     [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
-    [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
     [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
-    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
-    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
-    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
@@ -87,8 +85,9 @@  struct {
     [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
     [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
 };
+static const struct opcode_desc *opcode = opcode_descs;
 
-char *conditional_modifier[16] = {
+static const char * const conditional_modifier[16] = {
     [BRW_CONDITIONAL_NONE] = "",
     [BRW_CONDITIONAL_Z] = ".e",
     [BRW_CONDITIONAL_NZ] = ".ne",
@@ -101,17 +100,17 @@  char *conditional_modifier[16] = {
     [BRW_CONDITIONAL_U] = ".u",
 };
 
-char *negate[2] = {
+static const char * const negate_op[2] = {
     [0] = "",
     [1] = "-",
 };
 
-char *_abs[2] = {
+static const char * const _abs[2] = {
     [0] = "",
     [1] = "(abs)",
 };
 
-char *vert_stride[16] = {
+static const char * const vert_stride[16] = {
     [0] = "0",
     [1] = "1",
     [2] = "2",
@@ -122,7 +121,7 @@  char *vert_stride[16] = {
     [15] = "VxH",
 };
 
-char *width[8] = {
+static const char * const width[8] = {
     [0] = "1",
     [1] = "2",
     [2] = "4",
@@ -130,34 +129,41 @@  char *width[8] = {
     [4] = "16",
 };
 
-char *horiz_stride[4] = {
+static const char * const horiz_stride[4] = {
     [0] = "0",
     [1] = "1",
     [2] = "2",
     [3] = "4"
 };
 
-char *chan_sel[4] = {
+static const char * const chan_sel[4] = {
     [0] = "x",
     [1] = "y",
     [2] = "z",
     [3] = "w",
 };
 
-char *dest_condmod[16] = {
-};
-
-char *debug_ctrl[2] = {
+static const char * const debug_ctrl[2] = {
     [0] = "",
     [1] = ".breakpoint"
 };
 
-char *saturate[2] = {
+static const char * const saturate[2] = {
     [0] = "",
     [1] = ".sat"
 };
 
-char *exec_size[8] = {
+static const char * const accwr[2] = {
+    [0] = "",
+    [1] = "AccWrEnable"
+};
+
+static const char * const wectrl[2] = {
+    [0] = "WE_normal",
+    [1] = "WE_all"
+};
+
+static const char * const exec_size[8] = {
     [0] = "1",
     [1] = "2",
     [2] = "4",
@@ -166,12 +172,12 @@  char *exec_size[8] = {
     [5] = "32"
 };
 
-char *pred_inv[2] = {
+static const char * const pred_inv[2] = {
     [0] = "+",
     [1] = "-"
 };
 
-char *pred_ctrl_align16[16] = {
+static const char * const pred_ctrl_align16[16] = {
     [1] = "",
     [2] = ".x",
     [3] = ".y",
@@ -181,7 +187,7 @@  char *pred_ctrl_align16[16] = {
     [7] = ".all4h",
 };
 
-char *pred_ctrl_align1[16] = {
+static const char * const pred_ctrl_align1[16] = {
     [1] = "",
     [2] = ".anyv",
     [3] = ".allv",
@@ -195,35 +201,36 @@  char *pred_ctrl_align1[16] = {
     [11] = ".all16h",
 };
 
-char *thread_ctrl[4] = {
+static const char * const thread_ctrl[4] = {
     [0] = "",
     [2] = "switch"
 };
 
-char *compr_ctrl[4] = {
+static const char * const compr_ctrl[4] = {
     [0] = "",
     [1] = "sechalf",
     [2] = "compr",
+    [3] = "compr4",
 };
 
-char *dep_ctrl[4] = {
+static const char * const dep_ctrl[4] = {
     [0] = "",
     [1] = "NoDDClr",
     [2] = "NoDDChk",
     [3] = "NoDDClr,NoDDChk",
 };
 
-char *mask_ctrl[4] = {
+static const char * const mask_ctrl[4] = {
     [0] = "",
     [1] = "nomask",
 };
 
-char *access_mode[2] = {
+static const char * const access_mode[2] = {
     [0] = "align1",
     [1] = "align16",
 };
 
-char *reg_encoding[8] = {
+static const char * const reg_encoding[8] = {
     [0] = "UD",
     [1] = "D",
     [2] = "UW",
@@ -233,24 +240,24 @@  char *reg_encoding[8] = {
     [7] = "F"
 };
 
-char *imm_encoding[8] = {
-    [0] = "UD",
-    [1] = "D",
-    [2] = "UW",
-    [3] = "W",
-    [5] = "VF",
-    [6] = "V",
-    [7] = "F"
+const int reg_type_size[8] = {
+    [0] = 4,
+    [1] = 4,
+    [2] = 2,
+    [3] = 2,
+    [4] = 1,
+    [5] = 1,
+    [7] = 4
 };
 
-char *reg_file[4] = {
+static const char * const reg_file[4] = {
     [0] = "A",
     [1] = "g",
     [2] = "m",
     [3] = "imm",
 };
 
-char *writemask[16] = {
+static const char * const writemask[16] = {
     [0x0] = ".",
     [0x1] = ".x",
     [0x2] = ".y",
@@ -269,12 +276,12 @@  char *writemask[16] = {
     [0xf] = "",
 };
 
-char *end_of_thread[2] = {
+static const char * const end_of_thread[2] = {
     [0] = "",
     [1] = "EOT"
 };
 
-char *target_function[16] = {
+static const char * const target_function[16] = {
     [BRW_SFID_NULL] = "null",
     [BRW_SFID_MATH] = "math",
     [BRW_SFID_SAMPLER] = "sampler",
@@ -285,7 +292,37 @@  char *target_function[16] = {
     [BRW_SFID_THREAD_SPAWNER] = "thread_spawner"
 };
 
-char *math_function[16] = {
+static const char * const target_function_gen6[16] = {
+    [BRW_SFID_NULL] = "null",
+    [BRW_SFID_MATH] = "math",
+    [BRW_SFID_SAMPLER] = "sampler",
+    [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+    [BRW_SFID_URB] = "urb",
+    [BRW_SFID_THREAD_SPAWNER] = "thread_spawner",
+    [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler",
+    [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render",
+    [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
+    [GEN7_SFID_DATAPORT_DATA_CACHE] = "data"
+};
+
+static const char * const dp_rc_msg_type_gen6[16] = {
+    [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+    [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+    [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+    [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
+static const char * const math_function[16] = {
     [BRW_MATH_FUNCTION_INV] = "inv",
     [BRW_MATH_FUNCTION_LOG] = "log",
     [BRW_MATH_FUNCTION_EXP] = "exp",
@@ -297,52 +334,57 @@  char *math_function[16] = {
     [BRW_MATH_FUNCTION_TAN] = "tan",
     [BRW_MATH_FUNCTION_POW] = "pow",
     [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
-    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
-    [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv",
+    [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
 };
 
-char *math_saturate[2] = {
+static const char * const math_saturate[2] = {
     [0] = "",
     [1] = "sat"
 };
 
-char *math_signed[2] = {
+static const char * const math_signed[2] = {
     [0] = "",
     [1] = "signed"
 };
 
-char *math_scalar[2] = {
+static const char * const math_scalar[2] = {
     [0] = "",
     [1] = "scalar"
 };
 
-char *math_precision[2] = {
+static const char * const math_precision[2] = {
     [0] = "",
     [1] = "partial_precision"
 };
 
-char *urb_swizzle[4] = {
+static const char * const urb_opcode[2] = {
+    [0] = "urb_write",
+    [1] = "ff_sync",
+};
+
+static const char * const urb_swizzle[4] = {
     [BRW_URB_SWIZZLE_NONE] = "",
     [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
     [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
 };
 
-char *urb_allocate[2] = {
+static const char * const urb_allocate[2] = {
     [0] = "",
     [1] = "allocate"
 };
 
-char *urb_used[2] = {
+static const char * const urb_used[2] = {
     [0] = "",
     [1] = "used"
 };
 
-char *urb_complete[2] = {
+static const char * const urb_complete[2] = {
     [0] = "",
     [1] = "complete"
 };
 
-char *sampler_target_format[4] = {
+static const char * const sampler_target_format[4] = {
     [0] = "F",
     [2] = "UD",
     [3] = "D"
@@ -351,20 +393,21 @@  char *sampler_target_format[4] = {
 
 static int column;
 
-static int string (FILE *file, char *string)
+static int string (FILE *file, const char *string)
 {
     fputs (string, file);
     column += strlen (string);
     return 0;
 }
 
-static int format (FILE *f, char *format, ...)
+static int format (FILE *f, const char *format, ...)
 {
     char    buf[1024];
     va_list	args;
     va_start (args, format);
 
     vsnprintf (buf, sizeof (buf) - 1, format, args);
+    va_end (args);
     string (f, buf);
     return 0;
 }
@@ -384,7 +427,8 @@  static int pad (FILE *f, int c)
     return 0;
 }
 
-static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+static int control (FILE *file, const char *name, const char * const ctrl[],
+                    GLuint id, int *space)
 {
     if (!ctrl[id]) {
 	fprintf (file, "*** invalid %s value %d ",
@@ -415,6 +459,11 @@  static int print_opcode (FILE *file, int id)
 static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
 {
     int	err = 0;
+
+    /* Clear the Compr4 instruction compression bit. */
+    if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+       _reg_nr &= ~(1 << 7);
+
     if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
 	switch (_reg_nr & 0xf0) {
 	case BRW_ARF_NULL:
@@ -426,6 +475,9 @@  static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
 	case BRW_ARF_ACCUMULATOR:
 	    format (file, "acc%d", _reg_nr & 0x0f);
 	    break;
+	case BRW_ARF_FLAG:
+	    format (file, "f%d", _reg_nr & 0x0f);
+	    break;
 	case BRW_ARF_MASK:
 	    format (file, "mask%d", _reg_nr & 0x0f);
 	    break;
@@ -468,7 +520,8 @@  static int dest (FILE *file, struct brw_instruction *inst)
 	    if (err == -1)
 		return 0;
 	    if (inst->bits1.da1.dest_subreg_nr)
-		format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+		format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+				     reg_type_size[inst->bits1.da1.dest_reg_type]);
 	    format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
 	    err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
 	}
@@ -476,7 +529,8 @@  static int dest (FILE *file, struct brw_instruction *inst)
 	{
 	    string (file, "g[a0");
 	    if (inst->bits1.ia1.dest_subreg_nr)
-		format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+		format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+					reg_type_size[inst->bits1.ia1.dest_reg_type]);
 	    if (inst->bits1.ia1.dest_indirect_offset)
 		format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
 	    string (file, "]");
@@ -492,7 +546,8 @@  static int dest (FILE *file, struct brw_instruction *inst)
 	    if (err == -1)
 		return 0;
 	    if (inst->bits1.da16.dest_subreg_nr)
-		format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+		format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+				     reg_type_size[inst->bits1.da16.dest_reg_type]);
 	    string (file, "<1>");
 	    err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
 	    err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
@@ -507,6 +562,28 @@  static int dest (FILE *file, struct brw_instruction *inst)
     return 0;
 }
 
+static int dest_3src (FILE *file, struct brw_instruction *inst)
+{
+    int	err = 0;
+    uint32_t reg_file;
+
+    if (inst->bits1.da3src.dest_reg_file)
+       reg_file = BRW_MESSAGE_REGISTER_FILE;
+    else
+       reg_file = BRW_GENERAL_REGISTER_FILE;
+
+    err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
+    if (err == -1)
+       return 0;
+    if (inst->bits1.da3src.dest_subreg_nr)
+       format (file, ".%d", inst->bits1.da3src.dest_subreg_nr);
+    string (file, "<1>");
+    err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL);
+    err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL);
+
+    return 0;
+}
+
 static int src_align1_region (FILE *file,
 			      GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
 {
@@ -526,14 +603,14 @@  static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
 		    GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
 {
     int err = 0;
-    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "negate", negate_op, _negate, NULL);
     err |= control (file, "abs", _abs, __abs, NULL);
 
     err |= reg (file, _reg_file, reg_num);
     if (err == -1)
 	return 0;
     if (sub_reg_num)
-	format (file, ".%d", sub_reg_num);
+	format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
     src_align1_region (file, _vert_stride, _width, _horiz_stride);
     err |= control (file, "src reg encoding", reg_encoding, type, NULL);
     return err;
@@ -552,7 +629,7 @@  static int src_ia1 (FILE *file,
 		    GLuint _vert_stride)
 {
     int err = 0;
-    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "negate", negate_op, _negate, NULL);
     err |= control (file, "abs", _abs, __abs, NULL);
 
     string (file, "g[a0");
@@ -580,18 +657,120 @@  static int src_da16 (FILE *file,
 		     GLuint swz_w)
 {
     int err = 0;
-    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "negate", negate_op, _negate, NULL);
     err |= control (file, "abs", _abs, __abs, NULL);
 
     err |= reg (file, _reg_file, _reg_nr);
     if (err == -1)
 	return 0;
     if (_subreg_nr)
-	format (file, ".%d", _subreg_nr);
+	/* bit4 for subreg number byte addressing. Make this same meaning as
+	   in da1 case, so output looks consistent. */
+	format (file, ".%d", 16 / reg_type_size[_reg_type]);
     string (file, "<");
     err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
-    string (file, ",1,1>");
+    string (file, ",4,1>");
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all	 - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+	swz_y == BRW_CHANNEL_Y &&
+	swz_z == BRW_CHANNEL_Z &&
+	swz_w == BRW_CHANNEL_W)
+    {
+	;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+	err |= control (file, "channel select", chan_sel, swz_y, NULL);
+	err |= control (file, "channel select", chan_sel, swz_z, NULL);
+	err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
     err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+    return err;
+}
+
+static int src0_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3;
+
+    err |= control (file, "negate", negate_op, inst->bits1.da3src.src0_negate, NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL);
+
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr);
+    if (err == -1)
+	return 0;
+    if (inst->bits2.da3src.src0_subreg_nr)
+	format (file, ".%d", inst->bits2.da3src.src0_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+		    BRW_REGISTER_TYPE_F, NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all	 - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+	swz_y == BRW_CHANNEL_Y &&
+	swz_z == BRW_CHANNEL_Z &&
+	swz_w == BRW_CHANNEL_W)
+    {
+	;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+	err |= control (file, "channel select", chan_sel, swz_y, NULL);
+	err |= control (file, "channel select", chan_sel, swz_z, NULL);
+	err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+
+static int src1_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3;
+    GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low |
+			     (inst->bits3.da3src.src1_subreg_nr_high << 2));
+
+    err |= control (file, "negate", negate_op, inst->bits1.da3src.src1_negate,
+		    NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL);
+
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+		inst->bits3.da3src.src1_reg_nr);
+    if (err == -1)
+	return 0;
+    if (src1_subreg_nr)
+	format (file, ".%d", src1_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+		    BRW_REGISTER_TYPE_F, NULL);
     /*
      * Three kinds of swizzle display:
      *  identity - nothing printed
@@ -622,6 +801,56 @@  static int src_da16 (FILE *file,
 }
 
 
+static int src2_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3;
+
+    err |= control (file, "negate", negate_op, inst->bits1.da3src.src2_negate,
+		    NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL);
+
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+		inst->bits3.da3src.src2_reg_nr);
+    if (err == -1)
+	return 0;
+    if (inst->bits3.da3src.src2_subreg_nr)
+	format (file, ".%d", inst->bits3.da3src.src2_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+		    BRW_REGISTER_TYPE_F, NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all	 - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+	swz_y == BRW_CHANNEL_Y &&
+	swz_z == BRW_CHANNEL_Z &&
+	swz_w == BRW_CHANNEL_W)
+    {
+	;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+	err |= control (file, "channel select", chan_sel, swz_y, NULL);
+	err |= control (file, "channel select", chan_sel, swz_z, NULL);
+	err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+
 static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
     switch (type) {
     case BRW_REGISTER_TYPE_UD:
@@ -771,7 +1000,45 @@  static int src1 (FILE *file, struct brw_instruction *inst)
     }
 }
 
-int disasm (FILE *file, struct brw_instruction *inst)
+int esize[6] = {
+	[0] = 1,
+	[1] = 2,
+	[2] = 4,
+	[3] = 8,
+	[4] = 16,
+	[5] = 32,
+};
+
+static int qtr_ctrl(FILE *file, struct brw_instruction *inst)
+{
+    int qtr_ctl = inst->header.compression_control;
+    int exec_size = esize[inst->header.execution_size];
+
+    if (exec_size == 8) {
+	switch (qtr_ctl) {
+	case 0:
+	    string (file, " 1Q");
+	    break;
+	case 1:
+	    string (file, " 2Q");
+	    break;
+	case 2:
+	    string (file, " 3Q");
+	    break;
+	case 3:
+	    string (file, " 4Q");
+	    break;
+	}
+    } else if (exec_size == 16){
+	if (qtr_ctl < 2)
+	    string (file, " 1H");
+	else
+	    string (file, " 2H");
+    }
+    return 0;
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
 {
     int	err = 0;
     int space = 0;
@@ -779,7 +1046,7 @@  int disasm (FILE *file, struct brw_instruction *inst)
     if (inst->header.predicate_control) {
 	string (file, "(");
 	err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
-	format (file, "f%d", inst->bits2.da1.flag_reg_nr);
+	format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
 	if (inst->bits2.da1.flag_subreg_nr)
 	    format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
 	if (inst->header.access_mode == BRW_ALIGN_1)
@@ -795,42 +1062,106 @@  int disasm (FILE *file, struct brw_instruction *inst)
     err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
     err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
 
-    if (inst->header.opcode != BRW_OPCODE_SEND &&
-	inst->header.opcode != BRW_OPCODE_SENDC)
+    if (inst->header.opcode == BRW_OPCODE_MATH) {
+	string (file, " ");
+	err |= control (file, "function", math_function,
+			inst->header.destreg__conditionalmod, NULL);
+    } else if (inst->header.opcode != BRW_OPCODE_SEND &&
+	       inst->header.opcode != BRW_OPCODE_SENDC) {
 	err |= control (file, "conditional modifier", conditional_modifier,
 			inst->header.destreg__conditionalmod, NULL);
 
+        /* If we're using the conditional modifier, print which flags reg is
+         * used for it.  Note that on gen6+, the embedded-condition SEL and
+         * control flow doesn't update flags.
+         */
+	if (inst->header.destreg__conditionalmod &&
+            (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL &&
+                         inst->header.opcode != BRW_OPCODE_IF &&
+                         inst->header.opcode != BRW_OPCODE_WHILE))) {
+	    format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+	    if (inst->bits2.da1.flag_subreg_nr)
+		format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+        }
+    }
+
     if (inst->header.opcode != BRW_OPCODE_NOP) {
 	string (file, "(");
 	err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
 	string (file, ")");
     }
 
-    if (inst->header.opcode == BRW_OPCODE_SEND ||
-	inst->header.opcode == BRW_OPCODE_SENDC)
+    if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6)
 	format (file, " %d", inst->header.destreg__conditionalmod);
 
-    if (opcode[inst->header.opcode].ndst > 0) {
-	pad (file, 16);
-	err |= dest (file, inst);
-    }
-    if (opcode[inst->header.opcode].nsrc > 0) {
-	pad (file, 32);
-	err |= src0 (file, inst);
-    }
-    if (opcode[inst->header.opcode].nsrc > 1) {
-	pad (file, 48);
-	err |= src1 (file, inst);
+    if (opcode[inst->header.opcode].nsrc == 3) {
+       pad (file, 16);
+       err |= dest_3src (file, inst);
+
+       pad (file, 32);
+       err |= src0_3src (file, inst);
+
+       pad (file, 48);
+       err |= src1_3src (file, inst);
+
+       pad (file, 64);
+       err |= src2_3src (file, inst);
+    } else {
+       if (opcode[inst->header.opcode].ndst > 0) {
+	  pad (file, 16);
+	  err |= dest (file, inst);
+       } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE ||
+			       inst->header.opcode == BRW_OPCODE_ENDIF ||
+			       inst->header.opcode == BRW_OPCODE_WHILE)) {
+	  format (file, " %d", inst->bits3.break_cont.jip);
+       } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF ||
+			       inst->header.opcode == BRW_OPCODE_ELSE ||
+			       inst->header.opcode == BRW_OPCODE_ENDIF ||
+			       inst->header.opcode == BRW_OPCODE_WHILE)) {
+	  format (file, " %d", inst->bits1.branch_gen6.jump_count);
+       } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK ||
+                                inst->header.opcode == BRW_OPCODE_CONTINUE ||
+                                inst->header.opcode == BRW_OPCODE_HALT)) ||
+                  (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) {
+	  format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip);
+       } else if (inst->header.opcode == BRW_OPCODE_JMPI) {
+	  format (file, " %d", inst->bits3.d);
+       }
+
+       if (opcode[inst->header.opcode].nsrc > 0) {
+	  pad (file, 32);
+	  err |= src0 (file, inst);
+       }
+       if (opcode[inst->header.opcode].nsrc > 1) {
+	  pad (file, 48);
+	  err |= src1 (file, inst);
+       }
     }
 
     if (inst->header.opcode == BRW_OPCODE_SEND ||
 	inst->header.opcode == BRW_OPCODE_SENDC) {
+	enum brw_message_target target;
+
+	if (gen >= 6)
+	    target = inst->header.destreg__conditionalmod;
+	else if (gen == 5)
+	    target = inst->bits2.send_gen5.sfid;
+	else
+	    target = inst->bits3.generic.msg_target;
+
 	newline (file);
 	pad (file, 16);
 	space = 0;
-	err |= control (file, "target function", target_function,
-			inst->header.destreg__conditionalmod, &space);
-	switch (inst->header.destreg__conditionalmod) {
+
+	if (gen >= 6) {
+	   err |= control (file, "target function", target_function_gen6,
+			   target, &space);
+	} else {
+	   err |= control (file, "target function", target_function,
+			   target, &space);
+	}
+
+	switch (target) {
 	case BRW_SFID_MATH:
 	    err |= control (file, "math function", math_function,
 			    inst->bits3.math.function, &space);
@@ -844,24 +1175,98 @@  int disasm (FILE *file, struct brw_instruction *inst)
 			    inst->bits3.math.precision, &space);
 	    break;
 	case BRW_SFID_SAMPLER:
-	    format (file, " (%d, %d, ",
-		    inst->bits3.sampler.binding_table_index,
-		    inst->bits3.sampler.sampler);
-	    err |= control (file, "sampler target format", sampler_target_format,
-			    inst->bits3.sampler.return_format, NULL);
-	    string (file, ")");
+	    if (gen >= 7) {
+		format (file, " (%d, %d, %d, %d)",
+			inst->bits3.sampler_gen7.binding_table_index,
+			inst->bits3.sampler_gen7.sampler,
+			inst->bits3.sampler_gen7.msg_type,
+			inst->bits3.sampler_gen7.simd_mode);
+	    } else if (gen >= 5) {
+		format (file, " (%d, %d, %d, %d)",
+			inst->bits3.sampler_gen5.binding_table_index,
+			inst->bits3.sampler_gen5.sampler,
+			inst->bits3.sampler_gen5.msg_type,
+			inst->bits3.sampler_gen5.simd_mode);
+	    } else if (0 /* FINISHME: is_g4x */) {
+		format (file, " (%d, %d)",
+			inst->bits3.sampler_g4x.binding_table_index,
+			inst->bits3.sampler_g4x.sampler);
+	    } else {
+		format (file, " (%d, %d, ",
+			inst->bits3.sampler.binding_table_index,
+			inst->bits3.sampler.sampler);
+		err |= control (file, "sampler target format",
+				sampler_target_format,
+				inst->bits3.sampler.return_format, NULL);
+		string (file, ")");
+	    }
+	    break;
+	case BRW_SFID_DATAPORT_READ:
+	    if (gen >= 6) {
+		format (file, " (%d, %d, %d, %d)",
+			inst->bits3.gen6_dp.binding_table_index,
+			inst->bits3.gen6_dp.msg_control,
+			inst->bits3.gen6_dp.msg_type,
+			inst->bits3.gen6_dp.send_commit_msg);
+	    } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+		format (file, " (%d, %d, %d)",
+			inst->bits3.dp_read_gen5.binding_table_index,
+			inst->bits3.dp_read_gen5.msg_control,
+			inst->bits3.dp_read_gen5.msg_type);
+	    } else {
+		format (file, " (%d, %d, %d)",
+			inst->bits3.dp_read.binding_table_index,
+			inst->bits3.dp_read.msg_control,
+			inst->bits3.dp_read.msg_type);
+	    }
 	    break;
+
 	case BRW_SFID_DATAPORT_WRITE:
-	    format (file, " (%d, %d, %d, %d)",
-		    inst->bits3.dp_write.binding_table_index,
-		    (inst->bits3.dp_write.last_render_target << 3) |
-		    inst->bits3.dp_write.msg_control,
-		    inst->bits3.dp_write.msg_type,
-		    inst->bits3.dp_write.send_commit_msg);
+	    if (gen >= 7) {
+		format (file, " (");
+
+		err |= control (file, "DP rc message type",
+				dp_rc_msg_type_gen6,
+				inst->bits3.gen7_dp.msg_type, &space);
+
+		format (file, ", %d, %d, %d)",
+			inst->bits3.gen7_dp.binding_table_index,
+			inst->bits3.gen7_dp.msg_control,
+			inst->bits3.gen7_dp.msg_type);
+	    } else if (gen == 6) {
+		format (file, " (");
+
+		err |= control (file, "DP rc message type",
+				dp_rc_msg_type_gen6,
+				inst->bits3.gen6_dp.msg_type, &space);
+
+		format (file, ", %d, %d, %d, %d)",
+			inst->bits3.gen6_dp.binding_table_index,
+			inst->bits3.gen6_dp.msg_control,
+			inst->bits3.gen6_dp.msg_type,
+			inst->bits3.gen6_dp.send_commit_msg);
+	    } else {
+		format (file, " (%d, %d, %d, %d)",
+			inst->bits3.dp_write.binding_table_index,
+			(inst->bits3.dp_write.last_render_target << 3) |
+			inst->bits3.dp_write.msg_control,
+			inst->bits3.dp_write.msg_type,
+			inst->bits3.dp_write.send_commit_msg);
+	    }
 	    break;
+
 	case BRW_SFID_URB:
-	    format (file, " %d", inst->bits3.urb.offset);
+	    if (gen >= 5) {
+		format (file, " %d", inst->bits3.urb_gen5.offset);
+	    } else {
+		format (file, " %d", inst->bits3.urb.offset);
+	    }
+
 	    space = 1;
+	    if (gen >= 5) {
+		err |= control (file, "urb opcode", urb_opcode,
+				inst->bits3.urb_gen5.opcode, &space);
+	    }
 	    err |= control (file, "urb swizzle", urb_swizzle,
 			    inst->bits3.urb.swizzle_control, &space);
 	    err |= control (file, "urb allocate", urb_allocate,
@@ -873,27 +1278,62 @@  int disasm (FILE *file, struct brw_instruction *inst)
 	    break;
 	case BRW_SFID_THREAD_SPAWNER:
 	    break;
+	case GEN7_SFID_DATAPORT_DATA_CACHE:
+	    format (file, " (%d, %d, %d)",
+		    inst->bits3.gen7_dp.binding_table_index,
+		    inst->bits3.gen7_dp.msg_control,
+		    inst->bits3.gen7_dp.msg_type);
+	    break;
+
+
 	default:
-	    format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+	    format (file, "unsupported target %d", target);
 	    break;
 	}
 	if (space)
 	    string (file, " ");
-	format (file, "mlen %d",
-		inst->bits3.generic.msg_length);
-	format (file, " rlen %d",
-		inst->bits3.generic.response_length);
+	if (gen >= 5) {
+	   format (file, "mlen %d",
+		   inst->bits3.generic_gen5.msg_length);
+	   format (file, " rlen %d",
+		   inst->bits3.generic_gen5.response_length);
+	} else {
+	   format (file, "mlen %d",
+		   inst->bits3.generic.msg_length);
+	   format (file, " rlen %d",
+		   inst->bits3.generic.response_length);
+	}
     }
     pad (file, 64);
     if (inst->header.opcode != BRW_OPCODE_NOP) {
 	string (file, "{");
 	space = 1;
 	err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
-	err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+	if (gen >= 6)
+	    err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space);
+	else
+	    err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
 	err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
-	err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+
+	if (gen >= 6)
+	    err |= qtr_ctrl (file, inst);
+	else {
+	    if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
+		opcode[inst->header.opcode].ndst > 0 &&
+		inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
+		inst->bits1.da1.dest_reg_nr & (1 << 7)) {
+		format (file, " compr4");
+	    } else {
+		err |= control (file, "compression control", compr_ctrl,
+				inst->header.compression_control, &space);
+	    }
+	}
+
 	err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
-	if (inst->header.opcode == BRW_OPCODE_SEND)
+	if (gen >= 6)
+	    err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
+	if (inst->header.opcode == BRW_OPCODE_SEND ||
+	    inst->header.opcode == BRW_OPCODE_SENDC)
 	    err |= control (file, "end of thread", end_of_thread,
 			    inst->bits3.generic.end_of_thread, &space);
 	if (space)
diff --git a/assembler/brw_eu.h b/assembler/brw_eu.h
new file mode 100644
index 0000000..b7009ff
--- /dev/null
+++ b/assembler/brw_eu.h
@@ -0,0 +1,405 @@ 
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include <stdbool.h>
+#include "brw_structs.h"
+#include "brw_defines.h"
+#include "brw_reg.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BRW_EU_MAX_INSN_STACK 5
+
+struct brw_compile {
+   struct brw_instruction *store;
+   int store_size;
+   GLuint nr_insn;
+   unsigned int next_insn_offset;
+
+   void *mem_ctx;
+
+   /* Allow clients to push/pop instruction state:
+    */
+   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+   bool compressed_stack[BRW_EU_MAX_INSN_STACK];
+   struct brw_instruction *current;
+
+   GLuint flag_value;
+   bool single_program_flow;
+   bool compressed;
+   struct brw_context *brw;
+
+   /* Control flow stacks:
+    * - if_stack contains IF and ELSE instructions which must be patched
+    *   (and popped) once the matching ENDIF instruction is encountered.
+    *
+    *   Just store the instruction pointer(an index).
+    */
+   int *if_stack;
+   int if_stack_depth;
+   int if_stack_array_size;
+
+   /**
+    * loop_stack contains the instruction pointers of the starts of loops which
+    * must be patched (and popped) once the matching WHILE instruction is
+    * encountered.
+    */
+   int *loop_stack;
+   /**
+    * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
+    * blocks they were popping out of, to fix up the mask stack.  This tracks
+    * the IF/ENDIF nesting in each current nested loop level.
+    */
+   int *if_depth_in_loop;
+   int loop_stack_depth;
+   int loop_stack_array_size;
+};
+
+static inline struct brw_instruction *current_insn( struct brw_compile *p)
+{
+   return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, bool enable );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control(struct brw_compile *p, enum brw_compression c);
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg);
+void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
+
+void brw_init_compile(struct brw_context *, struct brw_compile *p,
+		      void *mem_ctx);
+void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end);
+const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+
+struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+		  struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+		  struct brw_reg reg);
+
+void gen6_resolve_implied_move(struct brw_compile *p,
+			       struct brw_reg *src,
+			       GLuint msg_reg_nr);
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0);
+
+#define ALU2(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0,			\
+	      struct brw_reg src1);
+
+#define ALU3(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0,			\
+	      struct brw_reg src1,			\
+	      struct brw_reg src2);
+
+#define ROUND(OP) \
+void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(AVG)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+ALU2(PLN)
+ALU3(MAD)
+
+ROUND(RNDZ)
+ROUND(RNDE)
+
+#undef ALU1
+#undef ALU2
+#undef ALU3
+#undef ROUND
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_set_sampler_message(struct brw_compile *p,
+                             struct brw_instruction *insn,
+                             GLuint binding_table_index,
+                             GLuint sampler,
+                             GLuint msg_type,
+                             GLuint response_length,
+                             GLuint msg_length,
+                             GLuint header_present,
+                             GLuint simd_mode,
+                             GLuint return_format);
+
+void brw_set_dp_read_message(struct brw_compile *p,
+			     struct brw_instruction *insn,
+			     GLuint binding_table_index,
+			     GLuint msg_control,
+			     GLuint msg_type,
+			     GLuint target_cache,
+			     GLuint msg_length,
+                             bool header_present,
+			     GLuint response_length);
+
+void brw_set_dp_write_message(struct brw_compile *p,
+			      struct brw_instruction *insn,
+			      GLuint binding_table_index,
+			      GLuint msg_control,
+			      GLuint msg_type,
+			      GLuint msg_length,
+			      bool header_present,
+			      GLuint last_render_target,
+			      GLuint response_length,
+			      GLuint end_of_thread,
+			      GLuint send_commit_msg);
+
+void brw_urb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   bool allocate,
+		   bool used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   bool eot,
+		   bool writes_complete,
+		   GLuint offset,
+		   GLuint swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   bool allocate,
+		   GLuint response_length,
+		   bool eot);
+
+void brw_svb_write(struct brw_compile *p,
+                   struct brw_reg dest,
+                   GLuint msg_reg_nr,
+                   struct brw_reg src0,
+                   GLuint binding_table_index,
+                   bool   send_commit_msg);
+
+void brw_fb_WRITE(struct brw_compile *p,
+		  int dispatch_width,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLuint msg_control,
+		   GLuint binding_table_index,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   bool eot,
+		   bool header_present);
+
+void brw_SAMPLE(struct brw_compile *p,
+		struct brw_reg dest,
+		GLuint msg_reg_nr,
+		struct brw_reg src0,
+		GLuint binding_table_index,
+		GLuint sampler,
+		GLuint writemask,
+		GLuint msg_type,
+		GLuint response_length,
+		GLuint msg_length,
+		GLuint header_present,
+		GLuint simd_mode,
+		GLuint return_format);
+
+void brw_math( struct brw_compile *p,
+	       struct brw_reg dest,
+	       GLuint function,
+	       GLuint msg_reg_nr,
+	       struct brw_reg src,
+	       GLuint data_type,
+	       GLuint precision );
+
+void brw_math2(struct brw_compile *p,
+	       struct brw_reg dest,
+	       GLuint function,
+	       struct brw_reg src0,
+	       struct brw_reg src1);
+
+void brw_oword_block_read(struct brw_compile *p,
+			  struct brw_reg dest,
+			  struct brw_reg mrf,
+			  uint32_t offset,
+			  uint32_t bind_table_index);
+
+void brw_oword_block_read_scratch(struct brw_compile *p,
+				  struct brw_reg dest,
+				  struct brw_reg mrf,
+				  int num_regs,
+				  GLuint offset);
+
+void brw_oword_block_write_scratch(struct brw_compile *p,
+				   struct brw_reg mrf,
+				   int num_regs,
+				   GLuint offset);
+
+void brw_shader_time_add(struct brw_compile *p,
+                         int mrf,
+                         uint32_t surf_index);
+
+/* If/else/endif.  Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, 
+			       GLuint execute_size);
+struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
+				struct brw_reg src0, struct brw_reg src1);
+
+void brw_ELSE(struct brw_compile *p);
+void brw_ENDIF(struct brw_compile *p);
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+			       GLuint execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_HALT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+void brw_WAIT(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+	     struct brw_reg dest,
+	     GLuint conditional,
+	     struct brw_reg src0,
+	     struct brw_reg src1);
+
+/*********************************************************************** 
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+				   struct brw_indirect dst_ptr,
+				   struct brw_indirect src_ptr,
+				   GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+			    struct brw_reg dst,
+			    struct brw_indirect ptr,
+			    GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count);
+
+void brw_math_invert( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg src);
+
+void brw_set_src1(struct brw_compile *p,
+		  struct brw_instruction *insn,
+		  struct brw_reg reg);
+
+void brw_set_uip_jip(struct brw_compile *p);
+
+uint32_t brw_swap_cmod(uint32_t cmod);
+
+/* brw_optimize.c */
+void brw_optimize(struct brw_compile *p);
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
+
+/* brw_disasm.c */
+struct opcode_desc {
+    char    *name;
+    int	    nsrc;
+    int	    ndst;
+};
+
+extern const struct opcode_desc opcode_descs[128];
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/assembler/brw_reg.h b/assembler/brw_reg.h
new file mode 100644
index 0000000..f225915
--- /dev/null
+++ b/assembler/brw_reg.h
@@ -0,0 +1,808 @@ 
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+/** @file brw_reg.h
+ *
+ * This file defines struct brw_reg, which is our representation for EU
+ * registers.  They're not a hardware specific format, just an abstraction
+ * that intends to capture the full flexibility of the hardware registers.
+ *
+ * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
+ * the abstract brw_reg type into the actual hardware instruction encoding.
+ */
+
+#ifndef BRW_REG_H
+#define BRW_REG_H
+
+#include <stdbool.h>
+#include <assert.h>
+#include "brw_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/**
+ * First GRF used for the MRF hack.
+ *
+ * On gen7, MRFs are no longer used, and contiguous GRFs are used instead.  We
+ * haven't converted our compiler to be aware of this, so it asks for MRFs and
+ * brw_eu_emit.c quietly converts them to be accesses of the top GRFs.  The
+ * register allocators have to be careful of this to avoid corrupting the "MRF"s
+ * with actual GRF allocations.
+ */
+#define GEN7_MRF_HACK_START 112
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
+#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+
+static inline bool
+brw_is_single_value_swizzle(int swiz)
+{
+   return (swiz == BRW_SWIZZLE_XXXX ||
+           swiz == BRW_SWIZZLE_YYYY ||
+           swiz == BRW_SWIZZLE_ZZZZ ||
+           swiz == BRW_SWIZZLE_WWWW);
+}
+
+#define BRW_WRITEMASK_X 0x1
+#define BRW_WRITEMASK_Y 0x2
+#define BRW_WRITEMASK_Z 0x4
+#define BRW_WRITEMASK_W 0x8
+
+#define BRW_WRITEMASK_XY (BRW_WRITEMASK_X | BRW_WRITEMASK_Y)
+#define BRW_WRITEMASK_XZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Z)
+#define BRW_WRITEMASK_XW (BRW_WRITEMASK_X | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_YW (BRW_WRITEMASK_Y | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_ZW (BRW_WRITEMASK_Z | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_XYZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | BRW_WRITEMASK_Z)
+#define BRW_WRITEMASK_XYZW (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | \
+                            BRW_WRITEMASK_Z | BRW_WRITEMASK_W)
+
+#define REG_SIZE (8*4)
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges.  Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg {
+   unsigned type:4;
+   unsigned file:2;
+   unsigned nr:8;
+   unsigned subnr:5;              /* :1 in align16 */
+   unsigned negate:1;             /* source only */
+   unsigned abs:1;                /* source only */
+   unsigned vstride:4;            /* source only */
+   unsigned width:3;              /* src only, align1 only */
+   unsigned hstride:2;            /* align1 only */
+   unsigned address_mode:1;       /* relative addressing, hopefully! */
+   unsigned pad0:1;
+
+   union {
+      struct {
+         unsigned swizzle:8;      /* src only, align16 only */
+         unsigned writemask:4;    /* dest only, align16 only */
+         int  indirect_offset:10; /* relative addressing offset */
+         unsigned pad1:10;        /* two dwords total */
+      } bits;
+
+      float f;
+      int   d;
+      unsigned ud;
+   } dw1;
+};
+
+
+struct brw_indirect {
+   unsigned addr_subnr:4;
+   int addr_offset:10;
+   unsigned pad:18;
+};
+
+
+static inline int
+type_sz(unsigned type)
+{
+   switch(type) {
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_F:
+      return 4;
+   case BRW_REGISTER_TYPE_HF:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_W:
+      return 2;
+   case BRW_REGISTER_TYPE_UB:
+   case BRW_REGISTER_TYPE_B:
+      return 1;
+   default:
+      return 0;
+   }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file      one of the BRW_x_REGISTER_FILE values
+ * \param nr        register number/index
+ * \param subnr     register sub number
+ * \param type      one of BRW_REGISTER_TYPE_x
+ * \param vstride   one of BRW_VERTICAL_STRIDE_x
+ * \param width     one of BRW_WIDTH_x
+ * \param hstride   one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle   one of BRW_SWIZZLE_x
+ * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield
+ */
+static inline struct brw_reg
+brw_reg(unsigned file,
+        unsigned nr,
+        unsigned subnr,
+        unsigned type,
+        unsigned vstride,
+        unsigned width,
+        unsigned hstride,
+        unsigned swizzle,
+        unsigned writemask)
+{
+   struct brw_reg reg;
+   if (file == BRW_GENERAL_REGISTER_FILE)
+      assert(nr < BRW_MAX_GRF);
+   else if (file == BRW_MESSAGE_REGISTER_FILE)
+      assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+   else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(nr <= BRW_ARF_TIMESTAMP);
+
+   reg.type = type;
+   reg.file = file;
+   reg.nr = nr;
+   reg.subnr = subnr * type_sz(type);
+   reg.negate = 0;
+   reg.abs = 0;
+   reg.vstride = vstride;
+   reg.width = width;
+   reg.hstride = hstride;
+   reg.address_mode = BRW_ADDRESS_DIRECT;
+   reg.pad0 = 0;
+
+   /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+    * set swizzle and writemask to W, as the lower bits of subnr will
+    * be lost when converted to align16.  This is probably too much to
+    * keep track of as you'd want it adjusted by suboffset(), etc.
+    * Perhaps fix up when converting to align16?
+    */
+   reg.dw1.bits.swizzle = swizzle;
+   reg.dw1.bits.writemask = writemask;
+   reg.dw1.bits.indirect_offset = 0;
+   reg.dw1.bits.pad1 = 0;
+   return reg;
+}
+
+/** Construct float[16] register */
+static inline struct brw_reg
+brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_16,
+                  BRW_WIDTH_16,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static inline struct brw_reg
+brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_8,
+                  BRW_WIDTH_8,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static inline struct brw_reg
+brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_4,
+                  BRW_WIDTH_4,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static inline struct brw_reg
+brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_2,
+                  BRW_WIDTH_2,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYXY,
+                  BRW_WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static inline struct brw_reg
+brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XXXX,
+                  BRW_WRITEMASK_X);
+}
+
+
+static inline struct brw_reg
+retype(struct brw_reg reg, unsigned type)
+{
+   reg.type = type;
+   return reg;
+}
+
+static inline struct brw_reg
+sechalf(struct brw_reg reg)
+{
+   if (reg.vstride)
+      reg.nr++;
+   return reg;
+}
+
+static inline struct brw_reg
+suboffset(struct brw_reg reg, unsigned delta)
+{
+   reg.subnr += delta * type_sz(reg.type);
+   return reg;
+}
+
+
+static inline struct brw_reg
+offset(struct brw_reg reg, unsigned delta)
+{
+   reg.nr += delta;
+   return reg;
+}
+
+
+static inline struct brw_reg
+byte_offset(struct brw_reg reg, unsigned bytes)
+{
+   unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+   reg.nr = newoffset / REG_SIZE;
+   reg.subnr = newoffset % REG_SIZE;
+   return reg;
+}
+
+
+/** Construct unsigned word[16] register */
+static inline struct brw_reg
+brw_uw16_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static inline struct brw_reg
+brw_uw8_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static inline struct brw_reg
+brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static inline struct brw_reg
+brw_imm_reg(unsigned type)
+{
+   return brw_reg(BRW_IMMEDIATE_VALUE,
+                  0,
+                  0,
+                  type,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  0,
+                  0);
+}
+
+/** Construct float immediate register */
+static inline struct brw_reg
+brw_imm_f(float f)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+   imm.dw1.f = f;
+   return imm;
+}
+
+/** Construct integer immediate register */
+static inline struct brw_reg
+brw_imm_d(int d)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+   imm.dw1.d = d;
+   return imm;
+}
+
+/** Construct uint immediate register */
+static inline struct brw_reg
+brw_imm_ud(unsigned ud)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+   imm.dw1.ud = ud;
+   return imm;
+}
+
+/** Construct ushort immediate register */
+static inline struct brw_reg
+brw_imm_uw(uint16_t uw)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+   imm.dw1.ud = uw | (uw << 16);
+   return imm;
+}
+
+/** Construct short immediate register */
+static inline struct brw_reg
+brw_imm_w(int16_t w)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+   imm.dw1.d = w | (w << 16);
+   return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static inline struct brw_reg
+brw_imm_v(unsigned v)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_8;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static inline struct brw_reg
+brw_imm_vf(unsigned v)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE  0x30
+#define VF_NEG  (1<<7)
+
+static inline struct brw_reg
+brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
+   return imm;
+}
+
+
+static inline struct brw_reg
+brw_address(struct brw_reg reg)
+{
+   return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static inline struct brw_reg
+brw_vec1_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static inline struct brw_reg
+brw_vec2_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static inline struct brw_reg
+brw_vec4_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static inline struct brw_reg
+brw_vec8_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static inline struct brw_reg
+brw_uw8_grf(unsigned nr, unsigned subnr)
+{
+   return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static inline struct brw_reg
+brw_uw16_grf(unsigned nr, unsigned subnr)
+{
+   return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static inline struct brw_reg
+brw_null_reg(void)
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
+}
+
+static inline struct brw_reg
+brw_address_reg(unsigned subnr)
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw.  This goes against the convention for other scalar
+ * regs:
+ */
+static inline struct brw_reg
+brw_ip_reg(void)
+{
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                  BRW_ARF_IP,
+                  0,
+                  BRW_REGISTER_TYPE_UD,
+                  BRW_VERTICAL_STRIDE_4, /* ? */
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XYZW, /* NOTE! */
+                  BRW_WRITEMASK_XYZW); /* NOTE! */
+}
+
+static inline struct brw_reg
+brw_acc_reg(void)
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0);
+}
+
+static inline struct brw_reg
+brw_notification_1_reg(void)
+{
+
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                  BRW_ARF_NOTIFICATION_COUNT,
+                  1,
+                  BRW_REGISTER_TYPE_UD,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XXXX,
+                  BRW_WRITEMASK_X);
+}
+
+
+static inline struct brw_reg
+brw_flag_reg(int reg, int subreg)
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                      BRW_ARF_FLAG + reg, subreg);
+}
+
+
+static inline struct brw_reg
+brw_mask_reg(unsigned subnr)
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr);
+}
+
+static inline struct brw_reg
+brw_message_reg(unsigned nr)
+{
+   assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
+}
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static inline unsigned cvt(unsigned val)
+{
+   switch (val) {
+   case 0: return 0;
+   case 1: return 1;
+   case 2: return 2;
+   case 4: return 3;
+   case 8: return 4;
+   case 16: return 5;
+   case 32: return 6;
+   }
+   return 0;
+}
+
+static inline struct brw_reg
+stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
+{
+   reg.vstride = cvt(vstride);
+   reg.width = cvt(width) - 1;
+   reg.hstride = cvt(hstride);
+   return reg;
+}
+
+
+static inline struct brw_reg
+vec16(struct brw_reg reg)
+{
+   return stride(reg, 16,16,1);
+}
+
+static inline struct brw_reg
+vec8(struct brw_reg reg)
+{
+   return stride(reg, 8,8,1);
+}
+
+static inline struct brw_reg
+vec4(struct brw_reg reg)
+{
+   return stride(reg, 4,4,1);
+}
+
+static inline struct brw_reg
+vec2(struct brw_reg reg)
+{
+   return stride(reg, 2,2,1);
+}
+
+static inline struct brw_reg
+vec1(struct brw_reg reg)
+{
+   return stride(reg, 0,1,0);
+}
+
+
+static inline struct brw_reg
+get_element(struct brw_reg reg, unsigned elt)
+{
+   return vec1(suboffset(reg, elt));
+}
+
+static inline struct brw_reg
+get_element_ud(struct brw_reg reg, unsigned elt)
+{
+   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+static inline struct brw_reg
+get_element_d(struct brw_reg reg, unsigned elt)
+{
+   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
+}
+
+
+static inline struct brw_reg
+brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w)
+{
+   assert(reg.file != BRW_IMMEDIATE_VALUE);
+
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+   return reg;
+}
+
+
+static inline struct brw_reg
+brw_swizzle1(struct brw_reg reg, unsigned x)
+{
+   return brw_swizzle(reg, x, x, x, x);
+}
+
+static inline struct brw_reg
+brw_writemask(struct brw_reg reg, unsigned mask)
+{
+   assert(reg.file != BRW_IMMEDIATE_VALUE);
+   reg.dw1.bits.writemask &= mask;
+   return reg;
+}
+
+static inline struct brw_reg
+brw_set_writemask(struct brw_reg reg, unsigned mask)
+{
+   assert(reg.file != BRW_IMMEDIATE_VALUE);
+   reg.dw1.bits.writemask = mask;
+   return reg;
+}
+
+static inline struct brw_reg
+negate(struct brw_reg reg)
+{
+   reg.negate ^= 1;
+   return reg;
+}
+
+static inline struct brw_reg
+brw_abs(struct brw_reg reg)
+{
+   reg.abs = 1;
+   reg.negate = 0;
+   return reg;
+}
+
+/************************************************************************/
+
+static inline struct brw_reg
+brw_vec4_indirect(unsigned subnr, int offset)
+{
+   struct brw_reg reg =  brw_vec4_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
+static inline struct brw_reg
+brw_vec1_indirect(unsigned subnr, int offset)
+{
+   struct brw_reg reg =  brw_vec1_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
+static inline struct brw_reg
+deref_4f(struct brw_indirect ptr, int offset)
+{
+   return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static inline struct brw_reg
+deref_1f(struct brw_indirect ptr, int offset)
+{
+   return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static inline struct brw_reg
+deref_4b(struct brw_indirect ptr, int offset)
+{
+   return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static inline struct brw_reg
+deref_1uw(struct brw_indirect ptr, int offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static inline struct brw_reg
+deref_1d(struct brw_indirect ptr, int offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static inline struct brw_reg
+deref_1ud(struct brw_indirect ptr, int offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static inline struct brw_reg
+get_addr_reg(struct brw_indirect ptr)
+{
+   return brw_address_reg(ptr.addr_subnr);
+}
+
+static inline struct brw_indirect
+brw_indirect_offset(struct brw_indirect ptr, int offset)
+{
+   ptr.addr_offset += offset;
+   return ptr;
+}
+
+static inline struct brw_indirect
+brw_indirect(unsigned addr_subnr, int offset)
+{
+   struct brw_indirect ptr;
+   ptr.addr_subnr = addr_subnr;
+   ptr.addr_offset = offset;
+   ptr.pad = 0;
+   return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static inline bool
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+   return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+void brw_print_reg(struct brw_reg reg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/assembler/disasm-main.c b/assembler/disasm-main.c
index 5cc1e7d..b900e91 100644
--- a/assembler/disasm-main.c
+++ b/assembler/disasm-main.c
@@ -27,6 +27,7 @@ 
 #include <unistd.h>
 
 #include "gen4asm.h"
+#include "brw_eu.h"
 
 static const struct option longopts[] = {
 	{ NULL, 0, NULL, 0 }
@@ -95,7 +96,10 @@  read_program_binary (FILE *input)
 
 static void usage(void)
 {
-    fprintf(stderr, "usage: intel-gen4disasm [-o outputfile] [-b] inputfile\n");
+    fprintf(stderr, "usage: intel-gen4disasm [options] inputfile\n");
+    fprintf(stderr, "\t-b, --binary                         C style binary output\n");
+    fprintf(stderr, "\t-o, --output {outputfile}            Specify output file\n");
+    fprintf(stderr, "\t-g, --gen <4|5|6|7>                  Specify GPU generation\n");
 }
 
 int main(int argc, char **argv)
@@ -107,9 +111,10 @@  int main(int argc, char **argv)
     char		*output_file = NULL;
     int			byte_array_input = 0;
     int			o;
+    int			gen = 4;
     struct brw_program_instruction  *inst;
 
-    while ((o = getopt_long(argc, argv, "o:b", longopts, NULL)) != -1) {
+    while ((o = getopt_long(argc, argv, "o:bg:", longopts, NULL)) != -1) {
 	switch (o) {
 	case 'o':
 	    if (strcmp(optarg, "-") != 0)
@@ -118,6 +123,15 @@  int main(int argc, char **argv)
 	case 'b':
 	    byte_array_input = 1;
 	    break;
+	case 'g':
+	    gen = strtol(optarg, NULL, 10);
+
+	    if (gen < 4 || gen > 7) {
+		    usage();
+		    exit(1);
+	    }
+
+	    break;
 	default:
 	    usage();
 	    exit(1);
@@ -153,6 +167,6 @@  int main(int argc, char **argv)
     }
 	    
     for (inst = program->first; inst; inst = inst->next)
-	disasm (output, &inst->instruction);
+	brw_disasm (output, &inst->instruction, gen);
     exit (0);
 }
diff --git a/assembler/gen4asm.h b/assembler/gen4asm.h
index f9ed161..e47e9e6 100644
--- a/assembler/gen4asm.h
+++ b/assembler/gen4asm.h
@@ -197,6 +197,3 @@  int yylex_destroy(void);
 
 char *
 lex_text(void);
-
-int
-disasm (FILE *output, struct brw_instruction *inst);