From patchwork Tue Sep 15 18:29:35 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jesse Barnes X-Patchwork-Id: 47717 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n8FITfcT010685 for ; Tue, 15 Sep 2009 18:29:41 GMT Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 78A919E8D0; Tue, 15 Sep 2009 11:29:41 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from outbound-mail-147.bluehost.com (outbound-mail-147.bluehost.com [67.222.38.37]) by gabe.freedesktop.org (Postfix) with SMTP id 945129E76F for ; Tue, 15 Sep 2009 11:29:38 -0700 (PDT) Received: (qmail 18285 invoked by uid 0); 15 Sep 2009 18:29:38 -0000 Received: from unknown (HELO box514.bluehost.com) (74.220.219.114) by outboundproxy5.bluehost.com with SMTP; 15 Sep 2009 18:29:38 -0000 Received: from [75.111.28.251] (helo=jbarnes-g45) by box514.bluehost.com with esmtpsa (TLSv1:AES128-SHA:128) (Exim 4.69) (envelope-from ) id 1MncmY-0005b1-JT; Tue, 15 Sep 2009 12:29:38 -0600 Date: Tue, 15 Sep 2009 11:29:35 -0700 From: Jesse Barnes To: Eric Anholt Message-ID: <20090915112935.787d64c1@jbarnes-g45> In-Reply-To: <1249802129.6701.36.camel@gaiman.anholt.net> References: <20090714134550.406ab5c1@jbarnes-g45> <20090714135503.513fdb49@jbarnes-g45> <1249802129.6701.36.camel@gaiman.anholt.net> X-Mailer: Claws Mail 3.7.2 (GTK+ 2.17.5; i486-pc-linux-gnu) Mime-Version: 1.0 X-Identified-User: {10642:box514.bluehost.com:virtuous:virtuousgeek.org} {sentby:smtp auth 75.111.28.251 authed with jbarnes@virtuousgeek.org} Cc: intel-gfx@lists.freedesktop.org Subject: Re: [Intel-gfx] [PATCH] mesa/intel: support tiled textures on pre-965 X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.9 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces@lists.freedesktop.org Errors-To: intel-gfx-bounces@lists.freedesktop.org On Sun, 09 Aug 2009 00:15:29 -0700 Eric Anholt wrote: > On Tue, 2009-07-14 at 13:55 -0700, Jesse Barnes wrote: > > This patch adds support for tiled textures on pre-965 chips. It > > uses the new libdrm tiled allocation function and the new fence > > register relocation type to specify buffer properties and rendering > > requirements. > > > > This one currently causes some rendering errors, but is otherwise > > stable. Feedback appreciated. I also noticed one other place we > > might use tiled objects, intel_bufferobj_alloc_buffer(), but didn't > > make the conversion here. > > > > Signed-off-by: Jesse Barnes Updated with your feedback. diff --git a/src/mesa/drivers/dri/i915/i915_metaops.c b/src/mesa/drivers/dri/i915/i915_metaops.c index 90a78c6..dae292f 100644 --- a/src/mesa/drivers/dri/i915/i915_metaops.c +++ b/src/mesa/drivers/dri/i915/i915_metaops.c @@ -334,6 +334,7 @@ meta_tex_rect_source(struct intel_context *intel, GLuint *state = i915->meta.Tex[0]; GLuint textureFormat; GLuint cpp; + uint32_t tiling, swizzle; /* A full implementation of this would do the upload through * glTexImage2d, and get all the conversion operations at that @@ -398,10 +399,16 @@ meta_tex_rect_source(struct intel_context *intel, /* intel_region_reference(&i915->meta.tex_region[0], region); */ i915->meta.tex_buffer[0] = buffer; i915->meta.tex_offset[0] = offset; + drm_intel_bo_get_tiling(buffer, &tiling, &swizzle); state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) | ((pitch - 1) << MS3_WIDTH_SHIFT) | - textureFormat | MS3_USE_FENCE_REGS); + textureFormat); + if (tiling != I915_TILING_NONE) { + state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE; + if (tiling == I915_TILING_Y) + state[I915_TEXREG_MS3] |= MS3_TILE_WALK; + } state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 6aa36d1..8fc671f 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -294,6 +294,32 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, return GL_TRUE; } +GLboolean +intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch, + dri_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t delta) +{ + int ret; + + if (batch->ptr - batch->map > batch->buf->size) + _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map, + buffer, delta, read_domains, + write_domain); + + /* + * Using the old buffer offset, write in what the right data would be, in case + * the buffer doesn't move and we can short-circuit the relocation processing + * in the kernel + */ + intel_batchbuffer_emit_dword (batch, buffer->offset + delta); + + return GL_TRUE; +} + void intel_batchbuffer_data(struct intel_batchbuffer *batch, const void *data, GLuint bytes, diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 51579df..baf5c05 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -95,6 +95,11 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, uint32_t read_domains, uint32_t write_domain, uint32_t offset); +GLboolean intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch, + dri_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to @@ -162,6 +167,13 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, read_domains, write_domain, delta); \ } while (0) +#define OUT_RELOC_FENCE(buf, read_domains, write_domain, delta) do { \ + assert((delta) >= 0); \ + intel_batchbuffer_emit_fenced_reloc(intel->batch, buf, \ + read_domains, write_domain, \ + delta); \ +} while (0) + #define ADVANCE_BATCH() do { \ unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ assert(intel->batch->emit.start_ptr != NULL); \ diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 43141c5..6a326a5 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -153,14 +153,20 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, OUT_BATCH((box.y1 << 16) | box.x1); OUT_BATCH((box.y2 << 16) | box.x2); - OUT_RELOC(dst->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + if (dst->tiling != I915_TILING_NONE) + OUT_RELOC_FENCE(dst->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + else + OUT_RELOC(dst->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src->buffer, - I915_GEM_DOMAIN_RENDER, 0, - 0); + if (src->tiling != I915_TILING_NONE) + OUT_RELOC_FENCE(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0); + else + OUT_RELOC(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0); ADVANCE_BATCH(); } @@ -318,14 +324,23 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + if (dst_tiling != I915_TILING_NONE) + OUT_RELOC_FENCE(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); + else + OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); + OUT_BATCH((src_y << 16) | src_x); OUT_BATCH((uint16_t)src_pitch); - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, - src_offset); + if (src_tiling != I915_TILING_NONE) + OUT_RELOC_FENCE(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, + src_offset); + else + OUT_RELOC(src_buffer, I915_GEM_DOMAIN_RENDER, 0, src_offset); + ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); @@ -531,9 +546,16 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(BR13); OUT_BATCH((b.y1 << 16) | b.x1); OUT_BATCH((b.y2 << 16) | b.x2); - OUT_RELOC(write_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - irb->region->draw_offset); + if (irb->region->tiling != I915_TILING_NONE) + OUT_RELOC_FENCE(write_buffer, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + irb->region->draw_offset); + else + OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + irb->region->draw_offset); + OUT_BATCH(clearVal); ADVANCE_BATCH(); clearMask &= ~bufBit; /* turn off bit, for faster loop exit */ @@ -612,9 +634,14 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + if (dst_tiling != I915_TILING_NONE) + OUT_RELOC_FENCE(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); + else + OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); + OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 804c034..f9ccaba 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -37,6 +37,7 @@ #include "main/texrender.h" #include "drivers/common/meta.h" +#include "intel_chipset.h" #include "intel_context.h" #include "intel_buffers.h" #include "intel_fbo.h" @@ -106,6 +107,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, { struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); + uint32_t tiling_mode = I915_TILING_NONE; GLboolean softwareBuffer = GL_FALSE; int cpp; @@ -218,9 +220,19 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch); - irb->region = intel_region_alloc(intel, I915_TILING_NONE, - cpp, width, height, pitch, - GL_TRUE); + if (intel->use_texture_tiling && + intel->intelScreen->kernel_exec_fencing) { + if (IS_965(intel->intelScreen->deviceID) && + (internalFormat == GL_DEPTH_COMPONENT || + internalFormat == GL_DEPTH_STENCIL_EXT)) + tiling_mode = I915_TILING_Y; + else + tiling_mode = I915_TILING_X; + } else + tiling_mode = I915_TILING_NONE; + + irb->region = intel_region_alloc(intel, tiling_mode, cpp, width, height, + pitch, GL_TRUE); if (!irb->region) return GL_FALSE; /* out of memory? */ diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index c985da5..83b6514 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -161,12 +161,24 @@ intel_miptree_create_for_region(struct intel_context *intel, GLuint compress_byte) { struct intel_mipmap_tree *mt; + uint32_t tiling = I915_TILING_NONE; + + if (intel->use_texture_tiling && compress_byte == 0 && + intel->intelScreen->kernel_exec_fencing) { + if (IS_965(intel->intelScreen->deviceID) && + (internal_format == GL_DEPTH_COMPONENT || + internal_format == GL_DEPTH_STENCIL_EXT)) + tiling = I915_TILING_Y; + else + tiling = I915_TILING_X; + } else + tiling = I915_TILING_NONE; mt = intel_miptree_create_internal(intel, target, internal_format, first_level, last_level, region->width, region->height, 1, region->cpp, compress_byte, - I915_TILING_NONE); + tiling); if (!mt) return mt; #if 0 diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index a86c66a..8ced450 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -105,7 +105,6 @@ debug_backtrace(void) #endif - /* XXX: Thread safety? */ GLubyte * @@ -180,6 +179,8 @@ intel_region_alloc(struct intel_context *intel, { dri_bo *buffer; struct intel_region *region; + unsigned long flags, stride; + int ret; /* If we're tiled, our allocations are in 8 or 32-row blocks, so * failure to align our height means that we won't allocate enough pages. @@ -203,21 +204,22 @@ intel_region_alloc(struct intel_context *intel, */ height = ALIGN(height, 2); - if (expect_accelerated_upload) { - buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", - pitch * cpp * height, 64); - } else { - buffer = drm_intel_bo_alloc(intel->bufmgr, "region", - pitch * cpp * height, 64); - } + if (expect_accelerated_upload) + flags = BO_ALLOC_FOR_RENDER; - region = intel_region_alloc_internal(intel, cpp, width, height, - pitch, buffer); + buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", width, height, + cpp, &tiling, &stride, flags); + if (!buffer) + return NULL; - if (tiling != I915_TILING_NONE) { - assert(((pitch * cpp) & 127) == 0); - drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp); - drm_intel_bo_get_tiling(buffer, ®ion->tiling, ®ion->bit_6_swizzle); + pitch = stride / cpp; + region = intel_region_alloc_internal(intel, cpp, width, height, pitch, + buffer); + ret = drm_intel_bo_get_tiling(buffer, ®ion->tiling, + ®ion->bit_6_swizzle); + if (ret != 0) { + intel_region_release(®ion); + return NULL; } return region; @@ -235,8 +237,8 @@ intel_region_alloc_for_handle(struct intel_context *intel, buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle); - region = intel_region_alloc_internal(intel, cpp, - width, height, pitch, buffer); + region = intel_region_alloc_internal(intel, cpp, width, height, pitch, + buffer); if (region == NULL) return region; @@ -455,6 +457,9 @@ void intel_region_release_pbo(struct intel_context *intel, struct intel_region *region) { + uint32_t tiling = I915_TILING_X; + unsigned long stride; + _DBG("%s %p\n", __FUNCTION__, region); assert(region->buffer == region->pbo->buffer); region->pbo->region = NULL; @@ -462,9 +467,11 @@ intel_region_release_pbo(struct intel_context *intel, dri_bo_unreference(region->buffer); region->buffer = NULL; - region->buffer = dri_bo_alloc(intel->bufmgr, "region", - region->pitch * region->cpp * region->height, - 64); + region->buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", + region->width, region->height, + region->cpp, &tiling, &stride, 0); + dri_bo_get_tiling(region->buffer, ®ion->tiling, ®ion->bit_6_swizzle); + region->pitch = stride / region->cpp; } /* Break the COW tie to the pbo. Both the pbo and the region end up diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 1b8c56e..0b55dd3 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -602,7 +602,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) GLboolean gem_supported; struct drm_i915_getparam gp; __DRIscreenPrivate *spriv = intelScreen->driScrnPriv; - int num_fences = 0; + int num_fences = 0, has_exec2 = 0; intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; @@ -658,6 +658,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) else intelScreen->kernel_exec_fencing = GL_FALSE; + if (intel_get_param(spriv, I915_PARAM_HAS_EXECBUF2, &has_exec2) && + has_exec2) + drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr); + return GL_TRUE; }