@@ -334,6 +334,7 @@ meta_tex_rect_source(struct intel_context *intel,
GLuint *state = i915->meta.Tex[0];
GLuint textureFormat;
GLuint cpp;
+ uint32_t tiling, swizzle;
/* A full implementation of this would do the upload through
* glTexImage2d, and get all the conversion operations at that
@@ -398,10 +399,16 @@ meta_tex_rect_source(struct intel_context *intel,
/* intel_region_reference(&i915->meta.tex_region[0], region); */
i915->meta.tex_buffer[0] = buffer;
i915->meta.tex_offset[0] = offset;
+ drm_intel_bo_get_tiling(buffer, &tiling, &swizzle);
state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) |
((pitch - 1) << MS3_WIDTH_SHIFT) |
- textureFormat | MS3_USE_FENCE_REGS);
+ textureFormat);
+ if (tiling != I915_TILING_NONE) {
+ state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
+ if (tiling == I915_TILING_Y)
+ state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
+ }
state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) |
MS4_CUBE_FACE_ENA_MASK |
@@ -294,6 +294,32 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
return GL_TRUE;
}
+GLboolean
+intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch,
+ dri_bo *buffer,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint32_t delta)
+{
+ int ret;
+
+ if (batch->ptr - batch->map > batch->buf->size)
+ _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+ batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+ ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map,
+ buffer, delta, read_domains,
+ write_domain);
+
+ /*
+ * Using the old buffer offset, write in what the right data would be, in case
+ * the buffer doesn't move and we can short-circuit the relocation processing
+ * in the kernel
+ */
+ intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+ return GL_TRUE;
+}
+
void
intel_batchbuffer_data(struct intel_batchbuffer *batch,
const void *data, GLuint bytes,
@@ -95,6 +95,11 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
uint32_t read_domains,
uint32_t write_domain,
uint32_t offset);
+GLboolean intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch,
+ dri_bo *buffer,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint32_t offset);
/* Inline functions - might actually be better off with these
* non-inlined. Certainly better off switching all command packets to
@@ -162,6 +167,13 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
read_domains, write_domain, delta); \
} while (0)
+#define OUT_RELOC_FENCE(buf, read_domains, write_domain, delta) do { \
+ assert((delta) >= 0); \
+ intel_batchbuffer_emit_fenced_reloc(intel->batch, buf, \
+ read_domains, write_domain, \
+ delta); \
+} while (0)
+
#define ADVANCE_BATCH() do { \
unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \
assert(intel->batch->emit.start_ptr != NULL); \
@@ -153,14 +153,20 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
OUT_BATCH((box.y1 << 16) | box.x1);
OUT_BATCH((box.y2 << 16) | box.x2);
- OUT_RELOC(dst->buffer,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- 0);
+ if (dst->tiling != I915_TILING_NONE)
+ OUT_RELOC_FENCE(dst->buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
+ else
+ OUT_RELOC(dst->buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH(src_pitch);
- OUT_RELOC(src->buffer,
- I915_GEM_DOMAIN_RENDER, 0,
- 0);
+ if (src->tiling != I915_TILING_NONE)
+ OUT_RELOC_FENCE(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0);
+ else
+ OUT_RELOC(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0);
ADVANCE_BATCH();
}
@@ -318,14 +324,23 @@ intelEmitCopyBlit(struct intel_context *intel,
OUT_BATCH(BR13 | (uint16_t)dst_pitch);
OUT_BATCH((dst_y << 16) | dst_x);
OUT_BATCH((dst_y2 << 16) | dst_x2);
- OUT_RELOC(dst_buffer,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- dst_offset);
+ if (dst_tiling != I915_TILING_NONE)
+ OUT_RELOC_FENCE(dst_buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ dst_offset);
+ else
+ OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ dst_offset);
+
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH((uint16_t)src_pitch);
- OUT_RELOC(src_buffer,
- I915_GEM_DOMAIN_RENDER, 0,
- src_offset);
+ if (src_tiling != I915_TILING_NONE)
+ OUT_RELOC_FENCE(src_buffer,
+ I915_GEM_DOMAIN_RENDER, 0,
+ src_offset);
+ else
+ OUT_RELOC(src_buffer, I915_GEM_DOMAIN_RENDER, 0, src_offset);
+
ADVANCE_BATCH();
intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -531,9 +546,16 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
OUT_BATCH(BR13);
OUT_BATCH((b.y1 << 16) | b.x1);
OUT_BATCH((b.y2 << 16) | b.x2);
- OUT_RELOC(write_buffer,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- irb->region->draw_offset);
+ if (irb->region->tiling != I915_TILING_NONE)
+ OUT_RELOC_FENCE(write_buffer,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ irb->region->draw_offset);
+ else
+ OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ irb->region->draw_offset);
+
OUT_BATCH(clearVal);
ADVANCE_BATCH();
clearMask &= ~bufBit; /* turn off bit, for faster loop exit */
@@ -612,9 +634,14 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
OUT_BATCH(br13);
OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
- OUT_RELOC(dst_buffer,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- dst_offset);
+ if (dst_tiling != I915_TILING_NONE)
+ OUT_RELOC_FENCE(dst_buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ dst_offset);
+ else
+ OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ dst_offset);
+
OUT_BATCH(0); /* bg */
OUT_BATCH(fg_color); /* fg */
OUT_BATCH(0); /* pattern base addr */
@@ -37,6 +37,7 @@
#include "main/texrender.h"
#include "drivers/common/meta.h"
+#include "intel_chipset.h"
#include "intel_context.h"
#include "intel_buffers.h"
#include "intel_fbo.h"
@@ -106,6 +107,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
{
struct intel_context *intel = intel_context(ctx);
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ uint32_t tiling_mode = I915_TILING_NONE;
GLboolean softwareBuffer = GL_FALSE;
int cpp;
@@ -218,9 +220,19 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
height, pitch);
- irb->region = intel_region_alloc(intel, I915_TILING_NONE,
- cpp, width, height, pitch,
- GL_TRUE);
+ if (intel->use_texture_tiling &&
+ intel->intelScreen->kernel_exec_fencing) {
+ if (IS_965(intel->intelScreen->deviceID) &&
+ (internalFormat == GL_DEPTH_COMPONENT ||
+ internalFormat == GL_DEPTH_STENCIL_EXT))
+ tiling_mode = I915_TILING_Y;
+ else
+ tiling_mode = I915_TILING_X;
+ } else
+ tiling_mode = I915_TILING_NONE;
+
+ irb->region = intel_region_alloc(intel, tiling_mode, cpp, width, height,
+ pitch, GL_TRUE);
if (!irb->region)
return GL_FALSE; /* out of memory? */
@@ -161,12 +161,24 @@ intel_miptree_create_for_region(struct intel_context *intel,
GLuint compress_byte)
{
struct intel_mipmap_tree *mt;
+ uint32_t tiling = I915_TILING_NONE;
+
+ if (intel->use_texture_tiling && compress_byte == 0 &&
+ intel->intelScreen->kernel_exec_fencing) {
+ if (IS_965(intel->intelScreen->deviceID) &&
+ (internal_format == GL_DEPTH_COMPONENT ||
+ internal_format == GL_DEPTH_STENCIL_EXT))
+ tiling = I915_TILING_Y;
+ else
+ tiling = I915_TILING_X;
+ } else
+ tiling = I915_TILING_NONE;
mt = intel_miptree_create_internal(intel, target, internal_format,
first_level, last_level,
region->width, region->height, 1,
region->cpp, compress_byte,
- I915_TILING_NONE);
+ tiling);
if (!mt)
return mt;
#if 0
@@ -105,7 +105,6 @@ debug_backtrace(void)
#endif
-
/* XXX: Thread safety?
*/
GLubyte *
@@ -180,6 +179,8 @@ intel_region_alloc(struct intel_context *intel,
{
dri_bo *buffer;
struct intel_region *region;
+ unsigned long flags, stride;
+ int ret;
/* If we're tiled, our allocations are in 8 or 32-row blocks, so
* failure to align our height means that we won't allocate enough pages.
@@ -203,21 +204,22 @@ intel_region_alloc(struct intel_context *intel,
*/
height = ALIGN(height, 2);
- if (expect_accelerated_upload) {
- buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
- pitch * cpp * height, 64);
- } else {
- buffer = drm_intel_bo_alloc(intel->bufmgr, "region",
- pitch * cpp * height, 64);
- }
+ if (expect_accelerated_upload)
+ flags = BO_ALLOC_FOR_RENDER;
- region = intel_region_alloc_internal(intel, cpp, width, height,
- pitch, buffer);
+ buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", width, height,
+ cpp, &tiling, &stride, flags);
+ if (!buffer)
+ return NULL;
- if (tiling != I915_TILING_NONE) {
- assert(((pitch * cpp) & 127) == 0);
- drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
- drm_intel_bo_get_tiling(buffer, ®ion->tiling, ®ion->bit_6_swizzle);
+ pitch = stride / cpp;
+ region = intel_region_alloc_internal(intel, cpp, width, height, pitch,
+ buffer);
+ ret = drm_intel_bo_get_tiling(buffer, ®ion->tiling,
+ ®ion->bit_6_swizzle);
+ if (ret != 0) {
+ intel_region_release(®ion);
+ return NULL;
}
return region;
@@ -235,8 +237,8 @@ intel_region_alloc_for_handle(struct intel_context *intel,
buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
- region = intel_region_alloc_internal(intel, cpp,
- width, height, pitch, buffer);
+ region = intel_region_alloc_internal(intel, cpp, width, height, pitch,
+ buffer);
if (region == NULL)
return region;
@@ -455,6 +457,9 @@ void
intel_region_release_pbo(struct intel_context *intel,
struct intel_region *region)
{
+ uint32_t tiling = I915_TILING_X;
+ unsigned long stride;
+
_DBG("%s %p\n", __FUNCTION__, region);
assert(region->buffer == region->pbo->buffer);
region->pbo->region = NULL;
@@ -462,9 +467,11 @@ intel_region_release_pbo(struct intel_context *intel,
dri_bo_unreference(region->buffer);
region->buffer = NULL;
- region->buffer = dri_bo_alloc(intel->bufmgr, "region",
- region->pitch * region->cpp * region->height,
- 64);
+ region->buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
+ region->width, region->height,
+ region->cpp, &tiling, &stride, 0);
+ dri_bo_get_tiling(region->buffer, ®ion->tiling, ®ion->bit_6_swizzle);
+ region->pitch = stride / region->cpp;
}
/* Break the COW tie to the pbo. Both the pbo and the region end up
@@ -602,7 +602,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
GLboolean gem_supported;
struct drm_i915_getparam gp;
__DRIscreenPrivate *spriv = intelScreen->driScrnPriv;
- int num_fences = 0;
+ int num_fences = 0, has_exec2 = 0;
intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
@@ -658,6 +658,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
else
intelScreen->kernel_exec_fencing = GL_FALSE;
+ if (intel_get_param(spriv, I915_PARAM_HAS_EXECBUF2, &has_exec2) &&
+ has_exec2)
+ drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
+
return GL_TRUE;
}