diff mbox

Big endian support for RV730 (Gallium r600g)

Message ID 4DA87058.9020609@ic.fr (mailing list archive)
State New, archived
Headers show

Commit Message

Cédric Cano April 15, 2011, 4:20 p.m. UTC
Hi

Here you are a patch that adds big endian support for rv730 in r600 
gallium driver.

I used the mesa-demos to test the driver status on big endian platform. 
Except with demos using accumulation buffer, the rendering is the same 
as on Intel platform. Albeit there are still some artefacts with some 
demos.

I manage to fix accumulation buffer demos but then, glReadPixels demos 
won't work. I still can figure out (like with r600c) what and when I 
must enable swap. It will depends on object's domains. That's what I 
tried to do in r600_cb and r600_create_sampler_view.

Reviewing of the patch would be greatly appreciated.

Regards,
Cedric
diff mbox

Patch

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index c22bd8e..7e854b1 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -22,6 +22,7 @@ 
  */
 #include <stdio.h>
 #include <errno.h>
+#include <byteswap.h>
 #include "util/u_format.h"
 #include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
@@ -32,6 +33,12 @@ 
 #include "r600_formats.h"
 #include "r600d.h"
 
+#ifdef PIPE_ARCH_BIG_ENDIAN
+#define CPU_TO_LE32(x)	bswap_32(x)
+#else
+#define CPU_TO_LE32(x)	(x)
+#endif
+
 #define NUM_OF_CYCLES 3
 #define NUM_OF_COMPONENTS 4
 
@@ -1383,6 +1390,7 @@  static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
 				S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
 				S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
 	bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) |
+	   			S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) |
 				S_SQ_VTX_WORD2_MEGA_FETCH(1);
 	bc->bytecode[id++] = 0;
 	return 0;
@@ -1917,6 +1925,7 @@  void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
 			id++;
 			fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
+			fprintf(stderr, "ENDIAN:%d ", vtx->endian);
 			fprintf(stderr, "OFFSET:%d\n", vtx->offset);
 			//TODO
 			id++;
@@ -1929,7 +1938,7 @@  void r600_bc_dump(struct r600_bc *bc)
 }
 
 static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
-				unsigned *num_format, unsigned *format_comp)
+				unsigned *num_format, unsigned *format_comp, unsigned *endian)
 {
 	const struct util_format_description *desc;
 	unsigned i;
@@ -1937,6 +1946,7 @@  static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 	*format = 0;
 	*num_format = 0;
 	*format_comp = 0;
+	*endian = ENDIAN_NONE;
 
 	desc = util_format_description(pformat);
 	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
@@ -1967,6 +1977,9 @@  static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 				*format = FMT_16_16_16_16_FLOAT;
 				break;
 			}
+#ifdef PIPE_ARCH_BIG_ENDIAN
+			*endian = ENDIAN_8IN16;
+#endif
 			break;
 		case 32:
 			switch (desc->nr_channels) {
@@ -1983,6 +1996,9 @@  static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 				*format = FMT_32_32_32_32_FLOAT;
 				break;
 			}
+#ifdef PIPE_ARCH_BIG_ENDIAN
+			*endian = ENDIAN_8IN32;
+#endif
 			break;
 		default:
 			goto out_unknown;
@@ -2020,6 +2036,9 @@  static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 				*format = FMT_16_16_16_16;
 				break;
 			}
+#ifdef PIPE_ARCH_BIG_ENDIAN
+			*endian = ENDIAN_8IN16;
+#endif
 			break;
 		case 32:
 			switch (desc->nr_channels) {
@@ -2036,6 +2055,9 @@  static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 				*format = FMT_32_32_32_32;
 				break;
 			}
+#ifdef PIPE_ARCH_BIG_ENDIAN
+			*endian = ENDIAN_8IN32;
+#endif
 			break;
 		default:
 			goto out_unknown;
@@ -2067,7 +2089,7 @@  int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 	struct pipe_vertex_element *elements = ve->elements;
 	const struct util_format_description *desc;
 	unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
-	unsigned format, num_format, format_comp;
+	unsigned format, num_format, format_comp, endian;
 	u32 *bytecode;
 	int i, r;
 
@@ -2114,7 +2136,7 @@  int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 
 	for (i = 0; i < ve->count; i++) {
 		unsigned vbuffer_index;
-		r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp);
+		r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp, &endian);
 		desc = util_format_description(ve->elements[i].src_format);
 		if (desc == NULL) {
 			r600_bc_clear(&bc);
@@ -2140,6 +2162,7 @@  int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 		vtx.format_comp_all = format_comp;
 		vtx.srf_mode_all = 1;
 		vtx.offset = elements[i].src_offset;
+		vtx.endian = endian;
 
 		if ((r = r600_bc_add_vtx(&bc, &vtx))) {
 			r600_bc_clear(&bc);
@@ -2179,7 +2202,9 @@  int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 		return -ENOMEM;
 	}
 
-	memcpy(bytecode, bc.bytecode, ve->fs_size);
+	for(i = 0; i < ve->fs_size / 4; i++) {
+		*(bytecode + i) = CPU_TO_LE32(*(bc.bytecode + i));
+	}
 
 	r600_bo_unmap(rctx->radeon, ve->fetch_shader);
 	r600_bc_clear(&bc);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 27ea293..26d337f 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -104,6 +104,7 @@  struct r600_bc_vtx {
 	unsigned			format_comp_all;
 	unsigned			srf_mode_all;
 	unsigned			offset;
+	unsigned			endian;
 };
 
 struct r600_bc_output {
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 6ced719..c1f063f 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -24,6 +24,8 @@ 
  *      Jerome Glisse
  *      Corbin Simpson <MostAwesomeDude@gmail.com>
  */
+#include <byteswap.h>
+
 #include <pipe/p_screen.h>
 #include <util/u_format.h>
 #include <util/u_math.h>
@@ -266,11 +268,31 @@  void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resour
 		uint8_t *ptr = (*rbuffer)->r.b.user_ptr;
 		unsigned size = (*rbuffer)->r.b.b.b.width0;
 		boolean flushed;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+		int i;
+		uint32_t *tmpPtr;
+
+		*rbuffer = NULL;
+
+		tmpPtr = (uint32_t *)malloc(size);
+		/* big endian swap */
+		if(tmpPtr == NULL) {
+			return;
+		}
+		for(i = 0; i < size / 4; i++) {
+			tmpPtr[i] = bswap_32(*((uint32_t *)ptr + i));
+		}
+	
+		u_upload_data(rctx->vbuf_mgr->uploader, 0, size, tmpPtr, const_offset,
+			      (struct pipe_resource**)rbuffer, &flushed);
 
+		free(tmpPtr);
+#else
 		*rbuffer = NULL;
 
 		u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset,
 			      (struct pipe_resource**)rbuffer, &flushed);
+#endif
 	} else {
 		*const_offset = 0;
 	}
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 6eac1f7..188cea0 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -33,6 +33,13 @@ 
 #include "r600d.h"
 #include <stdio.h>
 #include <errno.h>
+#include <byteswap.h>
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+#define CPU_TO_LE32(x)	bswap_32(x)
+#else
+#define CPU_TO_LE32(x)	(x)
+#endif
 
 int r600_find_vs_semantic_index(struct r600_shader *vs,
 				struct r600_shader *ps, int id)
@@ -52,7 +59,8 @@  static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
 {
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_shader *rshader = &shader->shader;
-	void *ptr;
+	uint32_t *ptr;
+	int	i;
 
 	/* copy new shader */
 	if (shader->bo == NULL) {
@@ -60,8 +68,10 @@  static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
 		if (shader->bo == NULL) {
 			return -ENOMEM;
 		}
-		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
-		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
+		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
+		for(i = 0; i < rshader->bc.ndw; i++) {
+			*(ptr + i) = CPU_TO_LE32(*(rshader->bc.bytecode + i));
+		}
 		r600_bo_unmap(rctx->radeon, shader->bo);
 	}
 	/* build state */
@@ -467,6 +477,11 @@  static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset
 	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
 	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
 	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
+#ifdef PIPE_ARCH_BIG_ENDIAN
+	vtx.endian = ENDIAN_8IN32;
+#else
+	vtx.endian = ENDIAN_NONE;
+#endif
 
 	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
 		return r;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3a863ae..93ef536 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -413,7 +413,7 @@  static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 	const struct util_format_description *desc;
 	struct r600_resource_texture *tmp;
 	struct r600_resource *rbuffer;
-	unsigned format;
+	unsigned format, endian;
 	uint32_t word4 = 0, yuv_format = 0, pitch = 0;
 	unsigned char swizzle[4], array_mode = 0, tile_type = 0;
 	struct r600_bo *bo[2];
@@ -450,6 +450,7 @@  static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 	        r600_texture_depth_flush(ctx, texture, TRUE);
 		tmp = tmp->flushed_depth_texture;
 	}
+	endian = r600_colorformat_endian_swap(format);
 
 	if (tmp->force_int_type) {
 		word4 &= C_038010_NUM_FORMAT_ALL;
@@ -490,6 +491,7 @@  static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 				word4 |
 				S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) |
 				S_038010_REQUEST_SIZE(1) |
+				S_038010_ENDIAN_SWAP(endian) |
 				S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
 				S_038014_LAST_LEVEL(state->u.tex.last_level) |
@@ -718,7 +720,7 @@  static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	unsigned level = state->cbufs[cb]->u.tex.level;
 	unsigned pitch, slice;
 	unsigned color_info;
-	unsigned format, swap, ntype;
+	unsigned format, swap, ntype, endian;
 	unsigned offset;
 	const struct util_format_description *desc;
 	struct r600_bo *bo[3];
@@ -755,6 +757,11 @@  static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 
 	format = r600_translate_colorformat(surf->base.format);
 	swap = r600_translate_colorswap(surf->base.format);
+	if(rbuffer->b.b.b.usage == PIPE_USAGE_STAGING) {
+		endian = ENDIAN_NONE;
+	} else {
+		endian = r600_colorformat_endian_swap(format);
+	}
 
 	/* disable when gallium grows int textures */
 	if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type)
@@ -764,7 +771,8 @@  static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 		S_0280A0_COMP_SWAP(swap) |
 		S_0280A0_ARRAY_MODE(rtex->array_mode[level]) |
 		S_0280A0_BLEND_CLAMP(1) |
-		S_0280A0_NUMBER_TYPE(ntype);
+		S_0280A0_NUMBER_TYPE(ntype) |
+		S_0280A0_ENDIAN(endian);
 
 	/* on R600 this can't be set if BLEND_CLAMP isn't set,
 	   if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */
@@ -1443,8 +1451,10 @@  void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
 	r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
 				rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
-				S_038008_STRIDE(stride),
-				0xFFFFFFFF, NULL);
+#ifdef PIPE_ARCH_BIG_ENDIAN
+				S_038008_ENDIAN_SWAP(ENDIAN_8IN32) |
+#endif
+				S_038008_STRIDE(stride), 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
 				0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 43dad0c..997c9a5 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -435,7 +435,7 @@  void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_resource *rbuffer;
-	u32 vgt_dma_index_type, vgt_draw_initiator, mask;
+	u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask;
 	struct r600_draw rdraw;
 	struct r600_pipe_state vgt;
 	struct r600_drawl draw = {};
@@ -467,14 +467,21 @@  void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		draw.info.index_bias = info->start;
 	}
 
+	vgt_dma_swap_mode = 0;
 	switch (draw.index_size) {
 	case 2:
 		vgt_draw_initiator = 0;
 		vgt_dma_index_type = 0;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+		vgt_dma_swap_mode = ENDIAN_8IN16;
+#endif
 		break;
 	case 4:
 		vgt_draw_initiator = 0;
 		vgt_dma_index_type = 1;
+#ifdef PIPE_ARCH_BIG_ENDIAN
+		vgt_dma_swap_mode = ENDIAN_8IN32;
+#endif
 		break;
 	case 0:
 		vgt_draw_initiator = 2;
@@ -521,7 +528,7 @@  void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 	rdraw.vgt_num_indices = draw.info.count;
 	rdraw.vgt_num_instances = draw.info.instance_count;
-	rdraw.vgt_index_type = vgt_dma_index_type;
+	rdraw.vgt_index_type = vgt_dma_index_type | (vgt_dma_swap_mode << 2);
 	rdraw.vgt_draw_initiator = vgt_draw_initiator;
 	rdraw.indices = NULL;
 	if (draw.index_buffer) {
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 3d03604..10aa099 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -498,6 +498,57 @@  static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 	}
 }
 
+static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+	switch(colorformat) {
+	case V_0280A0_COLOR_4_4:
+		return(ENDIAN_NONE);
+
+		/* 8-bit buffers. */
+	case V_0280A0_COLOR_8:
+		return(ENDIAN_NONE);
+
+		/* 16-bit buffers. */
+	case V_0280A0_COLOR_5_6_5:
+	case V_0280A0_COLOR_1_5_5_5:
+	case V_0280A0_COLOR_4_4_4_4:
+	case V_0280A0_COLOR_16:
+	case V_0280A0_COLOR_8_8:
+		return(ENDIAN_8IN16);
+
+		/* 32-bit buffers. */
+	case V_0280A0_COLOR_8_8_8_8:
+	case V_0280A0_COLOR_2_10_10_10:
+	case V_0280A0_COLOR_8_24:
+	case V_0280A0_COLOR_24_8:
+	case V_0280A0_COLOR_32_FLOAT:
+	case V_0280A0_COLOR_16_16_FLOAT:
+	case V_0280A0_COLOR_16_16:
+		return(ENDIAN_8IN32);
+
+		/* 64-bit buffers. */
+	case V_0280A0_COLOR_16_16_16_16:
+	case V_0280A0_COLOR_16_16_16_16_FLOAT:
+		return(ENDIAN_8IN16);
+
+	case V_0280A0_COLOR_32_32_FLOAT:
+	case V_0280A0_COLOR_32_32:
+		return(ENDIAN_8IN32);
+
+		/* 128-bit buffers. */
+	case V_0280A0_COLOR_32_32_32_FLOAT:
+	case V_0280A0_COLOR_32_32_32_32_FLOAT:
+	case V_0280A0_COLOR_32_32_32_32:
+		return(ENDIAN_8IN32);
+	default:
+		return ENDIAN_NONE; /* Unsupported. */
+	}
+#else
+	return ENDIAN_NONE;
+#endif
+}
+
 static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
 {
 	return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index df70e28..2bff52b 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -3460,4 +3460,10 @@ 
 #define SQ_TEX_INST_SAMPLE 0x10
 #define SQ_TEX_INST_SAMPLE_L 0x11
 #define SQ_TEX_INST_SAMPLE_C 0x18
+
+#define ENDIAN_NONE		0
+#define ENDIAN_8IN16	1
+#define ENDIAN_8IN32	2
+#define ENDIAN_8IN64	3
+
 #endif