[v5,1/6] drm/i915/gen8: Add infrastructure to initialize WA batch buffers

Message ID	1434632855-7080-2-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Arun Siluvery <arun.siluvery@linux.intel.com> To: intel-gfx@lists.freedesktop.org Date: Thu, 18 Jun 2015 14:07:30 +0100 Message-Id: <1434632855-7080-2-git-send-email-arun.siluvery@linux.intel.com> In-Reply-To: <1434632855-7080-1-git-send-email-arun.siluvery@linux.intel.com> References: <1434632855-7080-1-git-send-email-arun.siluvery@linux.intel.com> Subject: [Intel-gfx] [PATCH v5 1/6] drm/i915/gen8: Add infrastructure to initialize WA batch buffers Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0413b8f..ad0b189 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -211,6 +211,7 @@ enum { FAULT_AND_CONTINUE /* Unsupported */ }; #define GEN8_CTX_ID_SHIFT 32 +#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 static int intel_lr_context_pin(struct intel_engine_cs *ring, struct intel_context *ctx); @@ -1077,6 +1078,173 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, return 0; } +#define wa_ctx_emit(batch, cmd) { \ + if (WARN_ON(index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ + return -ENOSPC; \ + } \ + batch[index++] = (cmd); \ + } + +/** + * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA + * + * @ring: only applicable for RCS + * @wa_ctx_batch: page in which WA are loaded + * @offset: This is for future use in case if we would like to have multiple + * batches at different offsets and select them based on a criteria. + * @num_dwords: The number of WA applied are known at the beginning, it returns + * the no of DWORDS written. This batch does not contain MI_BATCH_BUFFER_END + * so it adds padding to make it cacheline aligned. MI_BATCH_BUFFER_END will be + * added to perctx batch and both of them together makes a complete batch buffer. + * + * Return: non-zero if we exceed the PAGE_SIZE limit. + */ + +static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, + uint32_t **wa_ctx_batch, + uint32_t offset, + uint32_t *num_dwords) +{ + uint32_t index; + uint32_t *batch = *wa_ctx_batch; + + index = offset; + + /* FIXME: fill one cacheline with NOOPs. + * Replace these instructions with WA + */ + while (index < (offset + 16)) + wa_ctx_emit(batch, MI_NOOP); + + /* + * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because + * execution depends on the length specified in terms of cache lines + * in the register CTX_RCS_INDIRECT_CTX + */ + + *num_dwords = index - offset; + + return 0; +} + +/* + * gen8_init_perctx_bb() - initialize per ctx batch with WA + * + * This function doesn't add any padding at the end as it contains + * MI_BATCH_BUFFER_END and padding after it is redundant. + */ +static int gen8_init_perctx_bb(struct intel_engine_cs *ring, + uint32_t **wa_ctx_batch, + uint32_t offset, + uint32_t *num_dwords) +{ + uint32_t index; + uint32_t *batch = *wa_ctx_batch; + + index = offset; + + /* FIXME: fill one cacheline with NOOPs. + * Replace these instructions with WA + */ + while (index < (offset + 16)) + wa_ctx_emit(batch, MI_NOOP); + + batch[index - 1] = MI_BATCH_BUFFER_END; + + *num_dwords = index - offset; + + return 0; +} + +static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size) +{ + int ret; + + WARN_ON(ring->id != RCS); + + ring->wa_ctx.obj = i915_gem_alloc_object(ring->dev, PAGE_ALIGN(size)); + if (!ring->wa_ctx.obj) { + DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n"); + return -ENOMEM; + } + + ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, PAGE_SIZE, 0); + if (ret) { + DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", + ret); + drm_gem_object_unreference(&ring->wa_ctx.obj->base); + return ret; + } + + return 0; +} + +static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring) +{ + WARN_ON(ring->id != RCS); + + i915_gem_object_ggtt_unpin(ring->wa_ctx.obj); + drm_gem_object_unreference(&ring->wa_ctx.obj->base); + ring->wa_ctx.obj = NULL; +} + +static int intel_init_workaround_bb(struct intel_engine_cs *ring) +{ + int ret = 0; + uint32_t *batch; + uint32_t num_dwords; + struct page *page; + struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx; + + WARN_ON(ring->id != RCS); + + if (ring->scratch.obj == NULL) { + DRM_ERROR("scratch page not allocated for %s\n", ring->name); + return -EINVAL; + } + + ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE); + if (ret) { + DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); + return ret; + } + + page = i915_gem_object_get_page(wa_ctx->obj, 0); + batch = kmap_atomic(page); + + if (INTEL_INFO(ring->dev)->gen == 8) { + wa_ctx->indctx_batch_offset = 0; + + ret = gen8_init_indirectctx_bb(ring, + &batch, + wa_ctx->indctx_batch_offset, + &num_dwords); + if (ret) + goto out; + + wa_ctx->indctx_batch_size = round_up(num_dwords, CACHELINE_DWORDS); + wa_ctx->perctx_batch_offset = wa_ctx->indctx_batch_size; + + ret = gen8_init_perctx_bb(ring, + &batch, + wa_ctx->perctx_batch_offset, + &num_dwords); + if (ret) + goto out; + } else { + WARN(INTEL_INFO(ring->dev)->gen >= 8, + "WA batch buffer is not initialized for Gen%d\n", + INTEL_INFO(ring->dev)->gen); + } + +out: + kunmap_atomic(batch); + if (ret) + lrc_destroy_wa_ctx_obj(ring); + + return ret; +} + static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -1411,6 +1579,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) kunmap(sg_page(ring->status_page.obj->pages->sgl)); ring->status_page.obj = NULL; } + + if (ring->wa_ctx.obj) + lrc_destroy_wa_ctx_obj(ring); } static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) @@ -1474,7 +1645,21 @@ static int logical_render_ring_init(struct drm_device *dev) if (ret) return ret; - return intel_init_pipe_control(ring); + if (INTEL_INFO(ring->dev)->gen >= 8) { + ret = intel_init_workaround_bb(ring); + if (ret) { + DRM_ERROR("WA batch buffers are not initialized: %d\n", + ret); + } + } + + ret = intel_init_pipe_control(ring); + if (ret) { + if (ring->wa_ctx.obj) + lrc_destroy_wa_ctx_obj(ring); + } + + return ret; } static int logical_bsd_ring_init(struct drm_device *dev) @@ -1754,15 +1939,26 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118; reg_state[CTX_SECOND_BB_STATE+1] = 0; if (ring->id == RCS) { - /* TODO: according to BSpec, the register state context - * for CHV does not have these. OTOH, these registers do - * exist in CHV. I'm waiting for a clarification */ reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0; reg_state[CTX_BB_PER_CTX_PTR+1] = 0; reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4; reg_state[CTX_RCS_INDIRECT_CTX+1] = 0; reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8; reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0; + if (ring->wa_ctx.obj) { + reg_state[CTX_RCS_INDIRECT_CTX+1] = + (i915_gem_obj_ggtt_offset(ring->wa_ctx.obj) + + ring->wa_ctx.indctx_batch_offset * sizeof(uint32_t)) | + (ring->wa_ctx.indctx_batch_size / CACHELINE_DWORDS); + + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = + CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6; + + reg_state[CTX_BB_PER_CTX_PTR+1] = + (i915_gem_obj_ggtt_offset(ring->wa_ctx.obj) + + ring->wa_ctx.perctx_batch_offset * sizeof(uint32_t)) | + 0x01; + } } reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9); reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 39f6dfc..1f38af3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -12,6 +12,7 @@ * workarounds! */ #define CACHELINE_BYTES 64 +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) /* * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" @@ -119,6 +120,22 @@ struct intel_ringbuffer { struct intel_context; +/* + * we use a single page to load ctx workarounds so all of these + * values are referred in terms of dwords + * + * offset field - helpful in case if we want to have multiple batches + * at different offsets based on some conditions. It is not a requirement + * at the moment but provides an option for future use. + * indctx_batch_size - HW expects this value in terms of cachelines + */ +struct i915_ctx_workarounds { + u32 indctx_batch_offset; + u32 indctx_batch_size; + u32 perctx_batch_offset; + struct drm_i915_gem_object *obj; +}; + struct intel_engine_cs { const char *name; enum intel_ring_id { @@ -142,6 +159,7 @@ struct intel_engine_cs { struct i915_gem_batch_pool batch_pool; struct intel_hw_status_page status_page; + struct i915_ctx_workarounds wa_ctx; unsigned irq_refcount; /* protected by dev_priv->irq_lock */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */

[v5,1/6] drm/i915/gen8: Add infrastructure to initialize WA batch buffers

Commit Message

Comments

Patch