diff mbox

mesa/intel: support tiled textures on pre-965

Message ID 20090915112935.787d64c1@jbarnes-g45 (mailing list archive)
State Not Applicable
Headers show

Commit Message

Jesse Barnes Sept. 15, 2009, 6:29 p.m. UTC
On Sun, 09 Aug 2009 00:15:29 -0700
Eric Anholt <eric@anholt.net> wrote:

> On Tue, 2009-07-14 at 13:55 -0700, Jesse Barnes wrote:
> > This patch adds support for tiled textures on pre-965 chips.  It
> > uses the new libdrm tiled allocation function and the new fence
> > register relocation type to specify buffer properties and rendering
> > requirements.
> > 
> > This one currently causes some rendering errors, but is otherwise
> > stable.  Feedback appreciated.  I also noticed one other place we
> > might use tiled objects, intel_bufferobj_alloc_buffer(), but didn't
> > make the conversion here.
> > 
> > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

Updated with your feedback.
diff mbox

Patch

diff --git a/src/mesa/drivers/dri/i915/i915_metaops.c b/src/mesa/drivers/dri/i915/i915_metaops.c
index 90a78c6..dae292f 100644
--- a/src/mesa/drivers/dri/i915/i915_metaops.c
+++ b/src/mesa/drivers/dri/i915/i915_metaops.c
@@ -334,6 +334,7 @@  meta_tex_rect_source(struct intel_context *intel,
    GLuint *state = i915->meta.Tex[0];
    GLuint textureFormat;
    GLuint cpp;
+   uint32_t tiling, swizzle;
 
    /* A full implementation of this would do the upload through
     * glTexImage2d, and get all the conversion operations at that
@@ -398,10 +399,16 @@  meta_tex_rect_source(struct intel_context *intel,
 /*    intel_region_reference(&i915->meta.tex_region[0], region); */
    i915->meta.tex_buffer[0] = buffer;
    i915->meta.tex_offset[0] = offset;
+   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle);
 
    state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) |
                              ((pitch - 1) << MS3_WIDTH_SHIFT) |
-                             textureFormat | MS3_USE_FENCE_REGS);
+                             textureFormat);
+   if (tiling != I915_TILING_NONE) {
+      state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
+      if (tiling == I915_TILING_Y)
+	      state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
+   }
 
    state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) |
                              MS4_CUBE_FACE_ENA_MASK |
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 6aa36d1..8fc671f 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -294,6 +294,32 @@  intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
    return GL_TRUE;
 }
 
+GLboolean
+intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch,
+				    dri_bo *buffer,
+				    uint32_t read_domains,
+				    uint32_t write_domain,
+				    uint32_t delta)
+{
+   int ret;
+
+   if (batch->ptr - batch->map > batch->buf->size)
+    _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+		  batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map,
+				       buffer, delta, read_domains,
+				       write_domain);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
 void
 intel_batchbuffer_data(struct intel_batchbuffer *batch,
                        const void *data, GLuint bytes,
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index 51579df..baf5c05 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -95,6 +95,11 @@  GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 				       uint32_t read_domains,
 				       uint32_t write_domain,
 				       uint32_t offset);
+GLboolean intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch,
+					      dri_bo *buffer,
+					      uint32_t read_domains,
+					      uint32_t write_domain,
+					      uint32_t offset);
 
 /* Inline functions - might actually be better off with these
  * non-inlined.  Certainly better off switching all command packets to
@@ -162,6 +167,13 @@  intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
 				read_domains, write_domain, delta);	\
 } while (0)
 
+#define OUT_RELOC_FENCE(buf, read_domains, write_domain, delta) do {	\
+   assert((delta) >= 0);						\
+   intel_batchbuffer_emit_fenced_reloc(intel->batch, buf,		\
+				       read_domains, write_domain,	\
+				       delta);				\
+} while (0)
+
 #define ADVANCE_BATCH() do {						\
    unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr;	\
    assert(intel->batch->emit.start_ptr != NULL);			\
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 43141c5..6a326a5 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -153,14 +153,20 @@  intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 	 OUT_BATCH((box.y1 << 16) | box.x1);
 	 OUT_BATCH((box.y2 << 16) | box.x2);
 
-	 OUT_RELOC(dst->buffer,
-		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		   0);
+	 if (dst->tiling != I915_TILING_NONE)
+		 OUT_RELOC_FENCE(dst->buffer,
+				 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+				 0);
+	 else
+		 OUT_RELOC(dst->buffer,
+			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+
 	 OUT_BATCH((src_y << 16) | src_x);
 	 OUT_BATCH(src_pitch);
-	 OUT_RELOC(src->buffer,
-		   I915_GEM_DOMAIN_RENDER, 0,
-		   0);
+	 if (src->tiling != I915_TILING_NONE)
+		 OUT_RELOC_FENCE(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0);
+	 else
+		 OUT_RELOC(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0);
 	 ADVANCE_BATCH();
       }
 
@@ -318,14 +324,23 @@  intelEmitCopyBlit(struct intel_context *intel,
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
    OUT_BATCH((dst_y2 << 16) | dst_x2);
-   OUT_RELOC(dst_buffer,
-	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-	     dst_offset);
+   if (dst_tiling != I915_TILING_NONE)
+	   OUT_RELOC_FENCE(dst_buffer,
+			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+			   dst_offset);
+   else
+	   OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		     dst_offset);
+
    OUT_BATCH((src_y << 16) | src_x);
    OUT_BATCH((uint16_t)src_pitch);
-   OUT_RELOC(src_buffer,
-	     I915_GEM_DOMAIN_RENDER, 0,
-	     src_offset);
+   if (src_tiling != I915_TILING_NONE)
+	   OUT_RELOC_FENCE(src_buffer,
+			   I915_GEM_DOMAIN_RENDER, 0,
+			   src_offset);
+   else
+	   OUT_RELOC(src_buffer, I915_GEM_DOMAIN_RENDER, 0, src_offset);
+
    ADVANCE_BATCH();
 
    intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -531,9 +546,16 @@  intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
                OUT_BATCH(BR13);
                OUT_BATCH((b.y1 << 16) | b.x1);
                OUT_BATCH((b.y2 << 16) | b.x2);
-               OUT_RELOC(write_buffer,
-			 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                         irb->region->draw_offset);
+ 	       if (irb->region->tiling != I915_TILING_NONE)
+		       OUT_RELOC_FENCE(write_buffer,
+				       I915_GEM_DOMAIN_RENDER,
+				       I915_GEM_DOMAIN_RENDER,
+				       irb->region->draw_offset);
+	       else
+		       OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER,
+				 I915_GEM_DOMAIN_RENDER,
+				 irb->region->draw_offset);
+
                OUT_BATCH(clearVal);
                ADVANCE_BATCH();
                clearMask &= ~bufBit;    /* turn off bit, for faster loop exit */
@@ -612,9 +634,14 @@  intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    OUT_BATCH(br13);
    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
    OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
-   OUT_RELOC(dst_buffer,
-	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-	     dst_offset);
+   if (dst_tiling != I915_TILING_NONE)
+	   OUT_RELOC_FENCE(dst_buffer,
+			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+			   dst_offset);
+   else
+	   OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		     dst_offset);
+
    OUT_BATCH(0); /* bg */
    OUT_BATCH(fg_color); /* fg */
    OUT_BATCH(0); /* pattern base addr */
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 804c034..f9ccaba 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -37,6 +37,7 @@ 
 #include "main/texrender.h"
 #include "drivers/common/meta.h"
 
+#include "intel_chipset.h"
 #include "intel_context.h"
 #include "intel_buffers.h"
 #include "intel_fbo.h"
@@ -106,6 +107,7 @@  intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   uint32_t tiling_mode = I915_TILING_NONE;
    GLboolean softwareBuffer = GL_FALSE;
    int cpp;
 
@@ -218,9 +220,19 @@  intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
 	  height, pitch);
 
-      irb->region = intel_region_alloc(intel, I915_TILING_NONE,
-				       cpp, width, height, pitch,
-				       GL_TRUE);
+      if (intel->use_texture_tiling &&
+	  intel->intelScreen->kernel_exec_fencing) {
+	 if (IS_965(intel->intelScreen->deviceID) &&
+	     (internalFormat == GL_DEPTH_COMPONENT ||
+	      internalFormat == GL_DEPTH_STENCIL_EXT))
+	    tiling_mode = I915_TILING_Y;
+	 else
+	    tiling_mode = I915_TILING_X;
+      } else
+	 tiling_mode = I915_TILING_NONE;
+
+      irb->region = intel_region_alloc(intel, tiling_mode, cpp, width, height,
+				       pitch, GL_TRUE);
       if (!irb->region)
          return GL_FALSE;       /* out of memory? */
 
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index c985da5..83b6514 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -161,12 +161,24 @@  intel_miptree_create_for_region(struct intel_context *intel,
 				GLuint compress_byte)
 {
    struct intel_mipmap_tree *mt;
+   uint32_t tiling = I915_TILING_NONE;
+
+   if (intel->use_texture_tiling && compress_byte == 0 &&
+       intel->intelScreen->kernel_exec_fencing) {
+      if (IS_965(intel->intelScreen->deviceID) &&
+	  (internal_format == GL_DEPTH_COMPONENT ||
+	   internal_format == GL_DEPTH_STENCIL_EXT))
+	 tiling = I915_TILING_Y;
+      else
+	 tiling = I915_TILING_X;
+   } else
+      tiling = I915_TILING_NONE;
 
    mt = intel_miptree_create_internal(intel, target, internal_format,
 				      first_level, last_level,
 				      region->width, region->height, 1,
 				      region->cpp, compress_byte,
-				      I915_TILING_NONE);
+				      tiling);
    if (!mt)
       return mt;
 #if 0
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index a86c66a..8ced450 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -105,7 +105,6 @@  debug_backtrace(void)
 #endif
 
 
-
 /* XXX: Thread safety?
  */
 GLubyte *
@@ -180,6 +179,8 @@  intel_region_alloc(struct intel_context *intel,
 {
    dri_bo *buffer;
    struct intel_region *region;
+   unsigned long flags, stride;
+   int ret;
 
    /* If we're tiled, our allocations are in 8 or 32-row blocks, so
     * failure to align our height means that we won't allocate enough pages.
@@ -203,21 +204,22 @@  intel_region_alloc(struct intel_context *intel,
     */
    height = ALIGN(height, 2);
 
-   if (expect_accelerated_upload) {
-      buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
-					     pitch * cpp * height, 64);
-   } else {
-      buffer = drm_intel_bo_alloc(intel->bufmgr, "region",
-				  pitch * cpp * height, 64);
-   }
+   if (expect_accelerated_upload)
+      flags = BO_ALLOC_FOR_RENDER;
 
-   region = intel_region_alloc_internal(intel, cpp, width, height,
-					pitch, buffer);
+   buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", width, height,
+				     cpp, &tiling, &stride, flags);
+   if (!buffer)
+      return NULL;
 
-   if (tiling != I915_TILING_NONE) {
-      assert(((pitch * cpp) & 127) == 0);
-      drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
-      drm_intel_bo_get_tiling(buffer, &region->tiling, &region->bit_6_swizzle);
+   pitch = stride / cpp;
+   region = intel_region_alloc_internal(intel, cpp, width, height, pitch,
+					buffer);
+   ret = drm_intel_bo_get_tiling(buffer, &region->tiling,
+				 &region->bit_6_swizzle);
+   if (ret != 0) {
+      intel_region_release(&region);
+      return NULL;
    }
 
    return region;
@@ -235,8 +237,8 @@  intel_region_alloc_for_handle(struct intel_context *intel,
 
    buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
 
-   region = intel_region_alloc_internal(intel, cpp,
-					width, height, pitch, buffer);
+   region = intel_region_alloc_internal(intel, cpp, width, height, pitch,
+					buffer);
    if (region == NULL)
       return region;
 
@@ -455,6 +457,9 @@  void
 intel_region_release_pbo(struct intel_context *intel,
                          struct intel_region *region)
 {
+   uint32_t tiling = I915_TILING_X;
+   unsigned long stride;
+
    _DBG("%s %p\n", __FUNCTION__, region);
    assert(region->buffer == region->pbo->buffer);
    region->pbo->region = NULL;
@@ -462,9 +467,11 @@  intel_region_release_pbo(struct intel_context *intel,
    dri_bo_unreference(region->buffer);
    region->buffer = NULL;
 
-   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
-				 region->pitch * region->cpp * region->height,
-				 64);
+   region->buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
+					     region->width, region->height,
+					     region->cpp, &tiling, &stride, 0);
+   dri_bo_get_tiling(region->buffer, &region->tiling, &region->bit_6_swizzle);
+   region->pitch = stride / region->cpp;
 }
 
 /* Break the COW tie to the pbo.  Both the pbo and the region end up
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 1b8c56e..0b55dd3 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -602,7 +602,7 @@  intel_init_bufmgr(intelScreenPrivate *intelScreen)
    GLboolean gem_supported;
    struct drm_i915_getparam gp;
    __DRIscreenPrivate *spriv = intelScreen->driScrnPriv;
-   int num_fences = 0;
+   int num_fences = 0, has_exec2 = 0;
 
    intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
 
@@ -658,6 +658,10 @@  intel_init_bufmgr(intelScreenPrivate *intelScreen)
    else
       intelScreen->kernel_exec_fencing = GL_FALSE;
 
+   if (intel_get_param(spriv, I915_PARAM_HAS_EXECBUF2, &has_exec2) &&
+       has_exec2)
+      drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
+
    return GL_TRUE;
 }