[v6,11/14] drm/i915: Enable commands submission via GuC

Message ID	1430345615-5576-12-git-send-email-yu.dai@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: yu.dai@intel.com To: intel-gfx@lists.freedesktop.org Date: Wed, 29 Apr 2015 15:13:32 -0700 Message-Id: <1430345615-5576-12-git-send-email-yu.dai@intel.com> In-Reply-To: <1430345615-5576-1-git-send-email-yu.dai@intel.com> References: <1430345615-5576-1-git-send-email-yu.dai@intel.com> Subject: [Intel-gfx] [PATCH v6 11/14] drm/i915: Enable commands submission via GuC Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 892f974..f8065cf 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -31,6 +31,7 @@ #define GUC_WQ_SIZE (PAGE_SIZE * 2) struct i915_guc_client { + spinlock_t wq_lock; struct drm_i915_gem_object *client_obj; u32 priority; off_t doorbell_offset; @@ -39,6 +40,8 @@ struct i915_guc_client { uint16_t doorbell_id; uint32_t ctx_index; uint32_t wq_size; + uint32_t wq_tail; + uint32_t cookie; }; #define I915_MAX_DOORBELLS 256 diff --git a/drivers/gpu/drm/i915/intel_guc_client.c b/drivers/gpu/drm/i915/intel_guc_client.c index 7922427..31934a3 100644 --- a/drivers/gpu/drm/i915/intel_guc_client.c +++ b/drivers/gpu/drm/i915/intel_guc_client.c @@ -22,6 +22,7 @@ * */ #include <linux/firmware.h> +#include <linux/circ_buf.h> #include "i915_drv.h" #include "intel_guc.h" @@ -52,6 +53,14 @@ * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) * mapped into process space. * + * Work Items: + * There are several types of work items that the host may place into a + * workqueue, each with its own requirements and limitations. Currently only + * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which + * represents in-order queue. The kernel driver packs ring tail pointer and an + * ELSP context descriptor dword into Work Item. + * See add_workqueue_item() + * */ /* @@ -395,6 +404,8 @@ i915_guc_client_alloc(struct drm_device *dev, u32 priority) /* XXX: evict a doorbell instead */ goto err; + spin_lock_init(&client->wq_lock); + init_ctx_desc(guc, client); init_proc_desc(guc, client); init_doorbell(guc, client); @@ -414,6 +425,183 @@ err: return NULL; } +/* Get valid workqueue item and return it back to offset */ +static int get_workqueue_space(struct i915_guc_client *gc, u32 *offset) +{ + struct guc_process_desc *desc; + void *base; + u32 size = sizeof(struct guc_wq_item); + int ret = 0, timeout_counter = 200; + unsigned long flags; + + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); + desc = base + gc->proc_desc_offset; + + while (timeout_counter-- > 0) { + spin_lock_irqsave(&gc->wq_lock, flags); + + ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head, + gc->wq_size) >= size, 1); + + if (!ret) { + *offset = gc->wq_tail; + + /* advance the tail for next workqueue item */ + gc->wq_tail += size; + gc->wq_tail &= gc->wq_size - 1; + + /* this will break the loop */ + timeout_counter = 0; + } + + spin_unlock_irqrestore(&gc->wq_lock, flags); + }; + + kunmap_atomic(base); + + return ret; +} + +static void guc_update_context(struct intel_context *ctx, + struct intel_engine_cs *ring) +{ + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; + struct page *page; + uint32_t *reg_state; + + page = i915_gem_object_get_page(ctx_obj, 1); + reg_state = kmap_atomic(page); + + reg_state[CTX_RING_BUFFER_START + 1] = + i915_gem_obj_ggtt_offset(ringbuf->obj); + + /* True PPGTT with dynamic page allocation: update PDP registers and + * point the unallocated PDPs to the scratch page + */ + if (ctx->ppgtt) { + ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 3); + ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 2); + ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 1); + ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 0); + } + + kunmap_atomic(reg_state); +} + +static int add_workqueue_item(struct i915_guc_client *gc, + struct intel_context *ctx, + struct intel_engine_cs *ring) +{ + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + struct drm_i915_gem_object *ctx_obj; + struct guc_wq_item *wqi; + void *base; + u32 wq_off = 0, tail = ringbuf->tail, wq_len; + int ret; + + ctx_obj = ctx->engine[ring->id].state; + + /* Need this because of the deferred pin ctx and ring */ + /* Shall we move this right after ring is pinned? */ + guc_update_context(ctx, ring); + + ret = get_workqueue_space(gc, &wq_off); + if (ret) + return ret; + + /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we + * should not have the case where structure wqi is across page, neither + * wrapped to the beginning. This simplifies the implementation below. + * + * XXX: if not the case, we need save data to a temp wqi and copy it to + * workqueue buffer dw by dw. + */ + WARN_ON(sizeof(struct guc_wq_item) != 16); + WARN_ON(wq_off & 3); + + /* wq starts from the page after doorbell / process_desc */ + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, + (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT)); + wq_off &= PAGE_SIZE - 1; + wqi = (struct guc_wq_item *)((char *)base + wq_off); + + /* len does not include the header */ + wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1; + wqi->header = WQ_TYPE_INORDER | + (wq_len << WQ_LEN_SHIFT) | + (ring->id << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + + wqi->context_desc = (u32)execlists_ctx_descriptor(ring, ctx_obj); + /* tail index is in qw */ + tail >>= 3; + wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; + wqi->fence_id = 0; /*XXX: what fence to be here */ + + kunmap_atomic(base); + + return 0; +} + +static int ring_doorbell(struct i915_guc_client *gc) +{ + struct guc_process_desc *desc; + union guc_doorbell_qw db_cmp, db_exc, db_ret; + union guc_doorbell_qw *db; + void *base; + int attempt = 2, ret = -EAGAIN; + + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); + desc = base + gc->proc_desc_offset; + + /* Update the tail so it is visible to GuC */ + desc->tail = gc->wq_tail; + + /* current cookie */ + db_cmp.db_status = GUC_DOORBELL_ENABLED; + db_cmp.cookie = gc->cookie; + + /* cookie to be updated */ + db_exc.db_status = GUC_DOORBELL_ENABLED; + db_exc.cookie = gc->cookie + 1; + if (db_exc.cookie == 0) + db_exc.cookie = 1; + + /* pointer of current doorbell cacheline */ + db = base + gc->doorbell_offset; + + while (attempt--) { + /* lets ring the doorbell */ + db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db, + db_cmp.value_qw, db_exc.value_qw); + + /* if the exchange was successfully executed */ + if (db_ret.value_qw == db_cmp.value_qw) { + /* db was successfully rung */ + gc->cookie = db_exc.cookie; + ret = 0; + break; + } + + /* XXX: doorbell was lost and need to acquire it again */ + if (db_ret.db_status == GUC_DOORBELL_DISABLED) + break; + + DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", + db_cmp.cookie, db_ret.cookie); + + /* update the cookie to newly read cookie from GuC */ + db_cmp.cookie = db_ret.cookie; + db_exc.cookie = db_ret.cookie + 1; + if (db_exc.cookie == 0) + db_exc.cookie = 1; + } + + kunmap_atomic(base); + return ret; +} + /** * i915_guc_client_submit() - Submit commands through GuC * @client: the guc client where commands will go through @@ -426,5 +614,13 @@ int i915_guc_client_submit(struct i915_guc_client *client, struct intel_context *ctx, struct intel_engine_cs *ring) { - return 0; + int ret; + + ret = add_workqueue_item(client, ctx, ring); + if (ret) + return ret; + + ret = ring_doorbell(client); + + return ret; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8685205..15295db 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -187,8 +187,8 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) return lrca >> 12; } -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, - struct drm_i915_gem_object *ctx_obj) +uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj) { struct drm_device *dev = ring->dev; uint64_t desc; @@ -648,13 +648,17 @@ intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf, struct drm_i915_gem_request *request) { struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; intel_logical_ring_advance(ringbuf); if (intel_ring_stopped(ring)) return; - execlists_context_queue(ring, ctx, ringbuf->tail, request); + if (dev_priv->guc.execbuf_client) + i915_guc_client_submit(dev_priv->guc.execbuf_client, ctx, ring); + else + execlists_context_queue(ring, ctx, ringbuf->tail, request); } static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf, @@ -918,18 +922,23 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring, { struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + struct drm_i915_private *dev_priv = ring->dev->dev_private; int ret = 0; WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); if (ctx->engine[ring->id].pin_count++ == 0) { - ret = i915_gem_obj_ggtt_pin(ctx_obj, - GEN8_LR_CONTEXT_ALIGN, 0); + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE); if (ret) goto reset_pin_count; ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); if (ret) goto unpin_ctx_obj; + + /* Invalidate GuC TLB. */ + if (i915.enable_guc_scheduling) + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); } return ret; @@ -1283,6 +1292,13 @@ static int intel_lr_context_render_state_init(struct intel_engine_cs *ring, ret = __i915_add_request(ring, file, so.obj); /* intel_logical_ring_add_request moves object to inactive if it * fails */ + + /* GuC firmware will try to collapse its DPC work queue if the new one + * is for same context. So the following breadcrumb could be amended to + * this batch and submitted as one batch. Wait here to make sure the + * context state init is finished before any other submission to GuC. */ + if (!ret && i915.enable_guc_scheduling) + ret = i915_wait_request(so.obj->last_read_req); out: i915_gem_render_state_fini(&so); return ret; @@ -1291,8 +1307,13 @@ out: static int gen8_init_rcs_context(struct intel_engine_cs *ring, struct intel_context *ctx) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; int ret; + /* Invalidate GuC TLB. */ + if (i915.enable_guc_scheduling) + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + ret = intel_logical_ring_workarounds_emit(ring, ctx); if (ret) return ret; @@ -1819,7 +1840,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, } if (is_global_default_ctx) { - ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0); + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE); if (ret) { DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 04d3a6d..19c9a02 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -85,6 +85,8 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_object *batch_obj, u64 exec_start, u32 dispatch_flags); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); +uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj); void intel_lrc_irq_handler(struct intel_engine_cs *ring); void intel_execlists_retire_requests(struct intel_engine_cs *ring);

[v6,11/14] drm/i915: Enable commands submission via GuC

Commit Message

Patch